From 4ded097bed1663b307f353a0dd6ad931e345834e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 20 Oct 2017 11:41:17 +1100 Subject: constify more dcache.h inlined helpers. const struct pointers in commit f0d3b3ded999 ("constify dcache.c inlined helpers where possible"). This patch allows 'const' in a couple that were added since then. Signed-off-by: NeilBrown Signed-off-by: Al Viro --- include/linux/dcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ed1a7cf6923a..4cc3d891ea03 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -358,7 +358,7 @@ static inline void dont_mount(struct dentry *dentry) extern void __d_lookup_done(struct dentry *); -static inline int d_in_lookup(struct dentry *dentry) +static inline int d_in_lookup(const struct dentry *dentry) { return dentry->d_flags & DCACHE_PAR_LOOKUP; } @@ -486,7 +486,7 @@ static inline bool d_really_is_positive(const struct dentry *dentry) return dentry->d_inode != NULL; } -static inline int simple_positive(struct dentry *dentry) +static inline int simple_positive(const struct dentry *dentry) { return d_really_is_positive(dentry) && !d_unhashed(dentry); } -- cgit v1.2.3 From 6909e29fdefbb7aa643021279daef6ed10c81528 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 Oct 2017 16:06:11 +0200 Subject: kdb: use __ktime_get_real_seconds instead of __current_kernel_time kdb is the only user of the __current_kernel_time() interface, which is not y2038 safe and should be removed at some point. The kdb code also goes to great lengths to print the time in a human-readable format from 'struct timespec', again using a non-y2038-safe re-implementation of the generic time_to_tm() code. Using __current_kernel_time() here is necessary since the regular accessors that require a sequence lock might hang when called during the xtime update. However, this is safe in the particular case since kdb is only interested in the tv_sec field that is updated atomically. In order to make this y2038-safe, I'm converting the code to the generic time64_to_tm helper, but that introduces the problem that we have no interface like __current_kernel_time() that provides a 64-bit timestamp in a lockless, safe and architecture-independent way. I have multiple ideas for how to solve that: - __ktime_get_real_seconds() is lockless, but can return incorrect results on 32-bit architectures in the special case that we are in the process of changing the time across the epoch, either during the timer tick that overflows the seconds in 2038, or while calling settimeofday. - ktime_get_real_fast_ns() would work in this context, but does require a call into the clocksource driver to return a high-resolution timestamp. This may have undesired side-effects in the debugger, since we want to limit the interactions with the rest of the kernel. - Adding a ktime_get_real_fast_seconds() based on tk_fast_mono plus tkr->base_real without the tk_clock_read() delta. Not sure about the value of adding yet another interface here. - Changing the existing ktime_get_real_seconds() to use tk_fast_mono on 32-bit architectures rather than xtime_sec. I think this could work, but am not entirely sure if this is an improvement. I picked the first of those for simplicity here. It's technically not correct but probably good enough as the time is only used for the debugging output and the race will likely never be hit in practice. Another downside is having to move the declaration into a public header file. Let me know if anyone has a different preference. Cc: Andy Shevchenko Link: https://patchwork.kernel.org/patch/9775309/ Signed-off-by: Arnd Bergmann Signed-off-by: Jason Wessel --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b17bcce58bc4..588a0e4b1ab9 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -31,6 +31,7 @@ struct timespec64 get_monotonic_coarse64(void); extern void getrawmonotonic64(struct timespec64 *ts); extern void ktime_get_ts64(struct timespec64 *ts); extern time64_t ktime_get_seconds(void); +extern time64_t __ktime_get_real_seconds(void); extern time64_t ktime_get_real_seconds(void); extern int __getnstimeofday64(struct timespec64 *tv); -- cgit v1.2.3 From 5b698be0497d8be986e2050e9b1c145b2e0603c2 Mon Sep 17 00:00:00 2001 From: Meghana Madhyastha Date: Wed, 24 Jan 2018 16:34:07 +0000 Subject: video: backlight: Add helpers to enable and disable backlight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add helper functions backlight_enable and backlight_disable to enable/disable a backlight device. These helper functions can then be used by different drm and tinydrm drivers to avoid repetition of code and also to enforce a uniform and consistent way to enable/disable a backlight device. Acked-by: Daniel Thompson Reviewed-by: Noralf Trønnes Reviewed-by: Sean Paul Signed-off-by: Meghana Madhyastha Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/39b5bf0a02008a8072d910bdf8231c431e9ef504.1516810725.git.meghana.madhyastha@gmail.com --- include/linux/backlight.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index af7003548593..ace825e2ca2d 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -130,6 +130,38 @@ static inline int backlight_update_status(struct backlight_device *bd) return ret; } +/** + * backlight_enable - Enable backlight + * @bd: the backlight device to enable + */ +static inline int backlight_enable(struct backlight_device *bd) +{ + if (!bd) + return 0; + + bd->props.power = FB_BLANK_UNBLANK; + bd->props.fb_blank = FB_BLANK_UNBLANK; + bd->props.state &= ~BL_CORE_FBBLANK; + + return backlight_update_status(bd); +} + +/** + * backlight_disable - Disable backlight + * @bd: the backlight device to disable + */ +static inline int backlight_disable(struct backlight_device *bd) +{ + if (!bd) + return 0; + + bd->props.power = FB_BLANK_POWERDOWN; + bd->props.fb_blank = FB_BLANK_POWERDOWN; + bd->props.state |= BL_CORE_FBBLANK; + + return backlight_update_status(bd); +} + extern struct backlight_device *backlight_device_register(const char *name, struct device *dev, void *devdata, const struct backlight_ops *ops, const struct backlight_properties *props); -- cgit v1.2.3 From c2adda27d202fa8f70a5d6e8b0c12b449c8868b8 Mon Sep 17 00:00:00 2001 From: Meghana Madhyastha Date: Wed, 24 Jan 2018 16:35:30 +0000 Subject: video: backlight: Add of_find_backlight helper in backlight.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add of_find_backlight, a helper function which is a generic version of tinydrm_of_find_backlight that can be used by other drivers to avoid repetition of code and simplify things. Acked-by: Daniel Thompson Reviewed-by: Noralf Trønnes Reviewed-by: Sean Paul Reviewed-by: Thierry Reding Signed-off-by: Meghana Madhyastha Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/116d160ba78be2e6dcbdcb6855622bce67da9472.1516810725.git.meghana.madhyastha@gmail.com --- include/linux/backlight.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index ace825e2ca2d..ddc9bade4fb2 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -162,6 +162,16 @@ static inline int backlight_disable(struct backlight_device *bd) return backlight_update_status(bd); } +/** + * backlight_put - Drop backlight reference + * @bd: the backlight device to put + */ +static inline void backlight_put(struct backlight_device *bd) +{ + if (bd) + put_device(&bd->dev); +} + extern struct backlight_device *backlight_device_register(const char *name, struct device *dev, void *devdata, const struct backlight_ops *ops, const struct backlight_properties *props); @@ -205,4 +215,13 @@ of_find_backlight_by_node(struct device_node *node) } #endif +#if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE) +struct backlight_device *of_find_backlight(struct device *dev); +#else +static inline struct backlight_device *of_find_backlight(struct device *dev) +{ + return NULL; +} +#endif + #endif -- cgit v1.2.3 From 2e4ef3347b4a4eb65c4fd950d94bbd75fed4d798 Mon Sep 17 00:00:00 2001 From: Meghana Madhyastha Date: Wed, 24 Jan 2018 16:37:23 +0000 Subject: video: backlight: Add devres versions of of_find_backlight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add devm_of_find_backlight and the corresponding release function because some drivers use devres versions of functions for acquiring device resources. Acked-by: Daniel Thompson Reviewed-by: Noralf Trønnes Reviewed-by: Sean Paul Signed-off-by: Meghana Madhyastha Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/021f8fecfa3f374dc5dcb70fb07a6f6b019bea7b.1516810725.git.meghana.madhyastha@gmail.com --- include/linux/backlight.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index ddc9bade4fb2..2baab6f3861d 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -217,11 +217,18 @@ of_find_backlight_by_node(struct device_node *node) #if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE) struct backlight_device *of_find_backlight(struct device *dev); +struct backlight_device *devm_of_find_backlight(struct device *dev); #else static inline struct backlight_device *of_find_backlight(struct device *dev) { return NULL; } + +static inline struct backlight_device * +devm_of_find_backlight(struct device *dev) +{ + return NULL; +} #endif #endif -- cgit v1.2.3 From 1a3f6755649dd419d9e01cbc38e116e2c70acb73 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 15 Jan 2018 11:33:41 +0100 Subject: iio: adc: axp20x_adc: add support for AXP813 ADC The X-Powers AXP813 PMIC is really close to what is already done for AXP20X/AXP22X. There are two pairs of bits to set the rate (one for Voltage and Current measurements and one for TS/GPIO0 voltage measurements) instead of one. The register to set the ADC rates is different from the one for AXP20X/AXP22X. GPIO0 can be used as an ADC (measuring Volts) unlike for AXP22X. The scales to apply to the different inputs are unlike the ones from AXP20X and AXP22X. Signed-off-by: Quentin Schulz Acked-by: Jonathan Cameron Signed-off-by: Jonathan Cameron --- include/linux/mfd/axp20x.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 78dc85365c4f..ff95414c8316 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -266,6 +266,8 @@ enum axp20x_variants { #define AXP288_RT_BATT_V_H 0xa0 #define AXP288_RT_BATT_V_L 0xa1 +#define AXP813_ADC_RATE 0x85 + /* Fuel Gauge */ #define AXP288_FG_RDC1_REG 0xba #define AXP288_FG_RDC0_REG 0xbb -- cgit v1.2.3 From b28bad65c1fec47076ebee88b51b0dafa31f5065 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 4 Jan 2018 10:58:48 -0800 Subject: Input: libps2 - use u8 for byte data Instead of using unsigned char for the byte data switch to using u8. Also use unsigned int for the command codes and timeouts, and have ps2_handle_ack() and ps2_handle_response() return bool instead of int, as they do not return error codes but rather signal whether a byte was handled or not handled. ps2_is_keyboard_id() now returns bool as well. Signed-off-by: Dmitry Torokhov --- include/linux/libps2.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libps2.h b/include/linux/libps2.h index 4ad06e824f76..04a5750f1e4e 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -10,6 +10,9 @@ * the Free Software Foundation. */ +#include +#include +#include #define PS2_CMD_GETID 0x02f2 #define PS2_CMD_RESET_BAT 0x02ff @@ -36,21 +39,21 @@ struct ps2dev { wait_queue_head_t wait; unsigned long flags; - unsigned char cmdbuf[8]; - unsigned char cmdcnt; - unsigned char nak; + u8 cmdbuf[8]; + u8 cmdcnt; + u8 nak; }; void ps2_init(struct ps2dev *ps2dev, struct serio *serio); -int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout); -void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout); +int ps2_sendbyte(struct ps2dev *ps2dev, u8 byte, unsigned int timeout); +void ps2_drain(struct ps2dev *ps2dev, size_t maxbytes, unsigned int timeout); void ps2_begin_command(struct ps2dev *ps2dev); void ps2_end_command(struct ps2dev *ps2dev); -int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); -int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); -int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data); -int ps2_handle_response(struct ps2dev *ps2dev, unsigned char data); +int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command); +int ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command); +bool ps2_handle_ack(struct ps2dev *ps2dev, u8 data); +bool ps2_handle_response(struct ps2dev *ps2dev, u8 data); void ps2_cmd_aborted(struct ps2dev *ps2dev); -int ps2_is_keyboard_id(char id); +bool ps2_is_keyboard_id(u8 id); #endif /* _LIBPS2_H */ -- cgit v1.2.3 From 3a92dd331f90a33e0f0962b981577eb5078419c4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 4 Jan 2018 11:27:05 -0800 Subject: Input: libps2 - use BIT() for bitmask constants Let's explicitly document bit numbers with BIT() macro. Signed-off-by: Dmitry Torokhov --- include/linux/libps2.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libps2.h b/include/linux/libps2.h index 04a5750f1e4e..646b581fea56 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -10,6 +10,7 @@ * the Free Software Foundation. */ +#include #include #include #include @@ -23,11 +24,11 @@ #define PS2_RET_NAK 0xfe #define PS2_RET_ERR 0xfc -#define PS2_FLAG_ACK 1 /* Waiting for ACK/NAK */ -#define PS2_FLAG_CMD 2 /* Waiting for command to finish */ -#define PS2_FLAG_CMD1 4 /* Waiting for the first byte of command response */ -#define PS2_FLAG_WAITID 8 /* Command execiting is GET ID */ -#define PS2_FLAG_NAK 16 /* Last transmission was NAKed */ +#define PS2_FLAG_ACK BIT(0) /* Waiting for ACK/NAK */ +#define PS2_FLAG_CMD BIT(1) /* Waiting for a command to finish */ +#define PS2_FLAG_CMD1 BIT(2) /* Waiting for the first byte of command response */ +#define PS2_FLAG_WAITID BIT(3) /* Command executing is GET ID */ +#define PS2_FLAG_NAK BIT(4) /* Last transmission was NAKed */ struct ps2dev { struct serio *serio; -- cgit v1.2.3 From 08be954b7a7de6742d3d47e4dc20e3b086410761 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 2 Jan 2018 12:03:02 -0800 Subject: Input: psmouse - move sliced command implementation to libps2 In preparation to adding some debugging statements to PS/2 control sequences let's move psmouse_sliced_command() into libps2 and rename it to ps2_sliced_command(). Signed-off-by: Dmitry Torokhov --- include/linux/libps2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libps2.h b/include/linux/libps2.h index 646b581fea56..3c69cd796f48 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -15,6 +15,8 @@ #include #include +#define PS2_CMD_SETSCALE11 0x00e6 +#define PS2_CMD_SETRES 0x10e8 #define PS2_CMD_GETID 0x02f2 #define PS2_CMD_RESET_BAT 0x02ff @@ -52,6 +54,7 @@ void ps2_begin_command(struct ps2dev *ps2dev); void ps2_end_command(struct ps2dev *ps2dev); int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command); int ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command); +int ps2_sliced_command(struct ps2dev *ps2dev, u8 command); bool ps2_handle_ack(struct ps2dev *ps2dev, u8 data); bool ps2_handle_response(struct ps2dev *ps2dev, u8 data); void ps2_cmd_aborted(struct ps2dev *ps2dev); -- cgit v1.2.3 From 29acc42e8e10a4721757af9ed8aec569d30ce39b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 17 Jan 2018 12:00:24 -0800 Subject: Input: libps2 - relax command byte ACK handling When we probe PS/2 devices we first issue "Get ID" command and only if we receive what we consider a valid keyboard or mouse ID we disable the device and continue with protocol detection. That means that the device may be transmitting motion or keystroke data, while we expect ACK response. Instead of signaling failure if we see anything but ACK/NAK let's ignore "garbage" response until we see ACK for the command byte (first byte). The checks for subsequent ACKs of command parameters will continue be strict. Signed-off-by: Dmitry Torokhov --- include/linux/libps2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libps2.h b/include/linux/libps2.h index 3c69cd796f48..5f18fe02ae37 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -31,6 +31,7 @@ #define PS2_FLAG_CMD1 BIT(2) /* Waiting for the first byte of command response */ #define PS2_FLAG_WAITID BIT(3) /* Command executing is GET ID */ #define PS2_FLAG_NAK BIT(4) /* Last transmission was NAKed */ +#define PS2_FLAG_ACK_CMD BIT(5) /* Waiting to ACK the command (first) byte */ struct ps2dev { struct serio *serio; -- cgit v1.2.3 From 47a361634821dc66cefbfa70b9d10a91269d7f7d Mon Sep 17 00:00:00 2001 From: Crt Mori Date: Thu, 11 Jan 2018 11:19:57 +0100 Subject: lib: Add strongly typed 64bit int_sqrt There is no option to perform 64bit integer sqrt on 32bit platform. Added stronger typed int_sqrt64 enables the 64bit calculations to be performed on 32bit platforms. Using same algorithm as int_sqrt() with strong typing provides enough precision also on 32bit platforms, but it sacrifices some performance. In case values are smaller than ULONG_MAX the standard int_sqrt is used for calculation to maximize the performance due to more native calculations. Signed-off-by: Crt Mori Acked-by: Joe Perches Signed-off-by: Jonathan Cameron --- include/linux/kernel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ce51455e2adf..2da80e079d56 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -479,6 +479,15 @@ extern int func_ptr_is_kernel_text(void *ptr); unsigned long int_sqrt(unsigned long); +#if BITS_PER_LONG < 64 +u32 int_sqrt64(u64 x); +#else +static inline u32 int_sqrt64(u64 x) +{ + return (u32)int_sqrt(x); +} +#endif + extern void bust_spinlocks(int yes); extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ extern int panic_timeout; -- cgit v1.2.3 From e12f03d7031a977356e3d7b75a68c2185ff8d155 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 6 Dec 2017 14:45:15 -0800 Subject: perf/core: Implement the 'perf_kprobe' PMU A new PMU type, perf_kprobe is added. Based on attr from perf_event_open(), perf_kprobe creates a kprobe (or kretprobe) for the perf_event. This kprobe is private to this perf_event, and thus not added to global lists, and not available in tracefs. Two functions, create_local_trace_kprobe() and destroy_local_trace_kprobe() are added to created and destroy these local trace_kprobe. Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yonghong Song Reviewed-by: Josef Bacik Cc: Cc: Cc: Cc: Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171206224518.3598254-6-songliubraving@fb.com Signed-off-by: Ingo Molnar --- include/linux/trace_events.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index af44e7c2d577..21c5d43a21af 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -533,6 +533,10 @@ extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); extern int perf_trace_add(struct perf_event *event, int flags); extern void perf_trace_del(struct perf_event *event, int flags); +#ifdef CONFIG_KPROBE_EVENTS +extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe); +extern void perf_kprobe_destroy(struct perf_event *event); +#endif extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); -- cgit v1.2.3 From 33ea4b24277b06dbc55d7f5772a46f029600255e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 6 Dec 2017 14:45:16 -0800 Subject: perf/core: Implement the 'perf_uprobe' PMU This patch adds perf_uprobe support with similar pattern as previous patch (for kprobe). Two functions, create_local_trace_uprobe() and destroy_local_trace_uprobe(), are created so a uprobe can be created and attached to the file descriptor created by perf_event_open(). Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yonghong Song Reviewed-by: Josef Bacik Cc: Cc: Cc: Cc: Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171206224518.3598254-7-songliubraving@fb.com Signed-off-by: Ingo Molnar --- include/linux/trace_events.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21c5d43a21af..0d9d6cb454b1 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -537,6 +537,10 @@ extern void perf_trace_del(struct perf_event *event, int flags); extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe); extern void perf_kprobe_destroy(struct perf_event *event); #endif +#ifdef CONFIG_UPROBE_EVENTS +extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe); +extern void perf_uprobe_destroy(struct perf_event *event); +#endif extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); -- cgit v1.2.3 From 0c0eb4caf03bb6d3d92c70560e0530c8fdf62284 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Mon, 8 Jan 2018 10:50:50 -0500 Subject: dmaengine: avoid map_cnt overflow with CONFIG_DMA_ENGINE_RAID When CONFIG_DMA_ENGINE_RAID is enabled, unmap pool size can reach to 256. But in struct dmaengine_unmap_data, map_cnt is only u8, wrapping to 0, if the unmap pool is maximally used. This triggers BUG() when struct dmaengine_unmap_data is freed. Use u16 to fix the problem. Signed-off-by: Zi Yan Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index f838764993eb..861be5cab1df 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -470,7 +470,11 @@ typedef void (*dma_async_tx_callback_result)(void *dma_async_param, const struct dmaengine_result *result); struct dmaengine_unmap_data { +#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID) + u16 map_cnt; +#else u8 map_cnt; +#endif u8 to_cnt; u8 from_cnt; u8 bidi_cnt; -- cgit v1.2.3 From bd6f2fd5a1d52198468c5cdc3c2472362dff5aaa Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 30 Jan 2018 18:36:16 -0800 Subject: of: Support parsing phandle argument lists through a nexus node Platforms like 96boards have a standardized connector/expansion slot that exposes signals like GPIOs to expansion boards in an SoC agnostic way. We'd like the DT overlays for the expansion boards to be written once without knowledge of the SoC on the other side of the connector. This avoids the unscalable combinatorial explosion of a different DT overlay for each expansion board and SoC pair. We need a way to describe the GPIOs routed through the connector in an SoC agnostic way. Let's introduce nexus property parsing into the OF core to do this. This is largely based on the interrupt nexus support we already have. This allows us to remap a phandle list in a consumer node (e.g. reset-gpios) through a connector in a generic way (e.g. via gpio-map). Do this in a generic routine so that we can remap any sort of variable length phandle list. Taking GPIOs as an example, the connector would be a GPIO nexus, supporting the remapping of a GPIO specifier space to multiple GPIO providers on the SoC. DT would look as shown below, where 'soc_gpio1' and 'soc_gpio2' are inside the SoC, 'connector' is an expansion port where boards can be plugged in, and 'expansion_device' is a device on the expansion board. soc { soc_gpio1: gpio-controller1 { #gpio-cells = <2>; }; soc_gpio2: gpio-controller2 { #gpio-cells = <2>; }; }; connector: connector { #gpio-cells = <2>; gpio-map = <0 0 &soc_gpio1 1 0>, <1 0 &soc_gpio2 4 0>, <2 0 &soc_gpio1 3 0>, <3 0 &soc_gpio2 2 0>; gpio-map-mask = <0xf 0x0>; gpio-map-pass-thru = <0x0 0x1> }; expansion_device { reset-gpios = <&connector 2 GPIO_ACTIVE_LOW>; }; The GPIO core would use of_parse_phandle_with_args_map() instead of of_parse_phandle_with_args() and arrive at the same type of result, a phandle and argument list. The difference is that the phandle and arguments will be remapped through the nexus node to the underlying SoC GPIO controller node. In the example above, we would remap 'reset-gpios' from <&connector 2 GPIO_ACTIVE_LOW> to <&soc_gpio1 3 GPIO_ACTIVE_LOW>. Cc: Pantelis Antoniou Cc: Linus Walleij Cc: Mark Brown Signed-off-by: Stephen Boyd Signed-off-by: Rob Herring --- include/linux/of.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index da1ee95241c1..7258bbc85e4e 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -363,6 +363,9 @@ extern struct device_node *of_parse_phandle(const struct device_node *np, extern int of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int index, struct of_phandle_args *out_args); +extern int of_parse_phandle_with_args_map(const struct device_node *np, + const char *list_name, const char *stem_name, int index, + struct of_phandle_args *out_args); extern int of_parse_phandle_with_fixed_args(const struct device_node *np, const char *list_name, int cells_count, int index, struct of_phandle_args *out_args); @@ -815,6 +818,15 @@ static inline int of_parse_phandle_with_args(const struct device_node *np, return -ENOSYS; } +static inline int of_parse_phandle_with_args_map(const struct device_node *np, + const char *list_name, + const char *stem_name, + int index, + struct of_phandle_args *out_args) +{ + return -ENOSYS; +} + static inline int of_parse_phandle_with_fixed_args(const struct device_node *np, const char *list_name, int cells_count, int index, struct of_phandle_args *out_args) -- cgit v1.2.3 From 34e81f7a720d8a638f46b18b35678712dbafb42d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 24 Jan 2018 09:07:58 +0100 Subject: scsi: raid_class: Add 'JBOD' RAID level Not a real RAID level, but some HBAs support JBOD in addition to the 'classical' RAID levels. Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/linux/raid_class.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h index 31e1ff69efc8..ec8655514283 100644 --- a/include/linux/raid_class.h +++ b/include/linux/raid_class.h @@ -38,6 +38,7 @@ enum raid_level { RAID_LEVEL_5, RAID_LEVEL_50, RAID_LEVEL_6, + RAID_LEVEL_JBOD, }; struct raid_data { -- cgit v1.2.3 From f947153f92afcd957476b765dc4ac75d2680b17b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 9 Jan 2018 19:29:54 +0100 Subject: ARM: EXYNOS: Add SPDX license identifiers Replace GPL license statements with SPDX GPL-2.0 and GPL-2.0+ license identifiers. Signed-off-by: Krzysztof Kozlowski --- include/linux/serial_s3c.h | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index a7f004a3c177..463ed28d2b27 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Internal header file for Samsung S3C2410 serial ports (UART0-2) * @@ -10,21 +11,7 @@ * Internal header file for MX1ADS serial ports (UART1 & 2) * * Copyright (C) 2002 Shane Nay (shane@minirl.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ + */ #ifndef __ASM_ARM_REGS_SERIAL_H #define __ASM_ARM_REGS_SERIAL_H -- cgit v1.2.3 From bcb41a53b0b075600cb821302e7177ca5ab62efd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 9 Jan 2018 19:29:56 +0100 Subject: soc: samsung: Add SPDX license identifiers to headers Replace GPL license statements with SPDX GPL-2.0 license identifiers. Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/exynos-pmu.h | 5 +---- include/linux/soc/samsung/exynos-regs-pmu.h | 6 +----- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h index e57eb4b6cc5a..fc0b445bb36b 100644 --- a/include/linux/soc/samsung/exynos-pmu.h +++ b/include/linux/soc/samsung/exynos-pmu.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2014 Samsung Electronics Co., Ltd. * http://www.samsung.com * * Header for EXYNOS PMU Driver support - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __LINUX_SOC_EXYNOS_PMU_H diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index bebdde5dccd6..66dcb9ec273a 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -1,14 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2010-2015 Samsung Electronics Co., Ltd. * http://www.samsung.com * * EXYNOS - Power management unit definition * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * * Notice: * This is not a list of all Exynos Power Management Unit SFRs. * There are too many of them, not mentioning subtle differences -- cgit v1.2.3 From 2666ca9197e3d352f43b02d7dfb7c6dd72e7c614 Mon Sep 17 00:00:00 2001 From: Sarangdhar Joshi Date: Fri, 5 Jan 2018 16:04:17 -0800 Subject: remoteproc: Add remote processor coredump support As the remoteproc framework restarts the remote processor after a fatal event, it's useful to be able to acquire a coredump of the remote processor's state, for post mortem debugging. This patch introduces a mechanism for extracting the memory contents after the remote has stopped and before the restart sequence has begun in the recovery path. The remoteproc framework builds the core dump in memory and use devcoredump to expose this to user space. Signed-off-by: Sarangdhar Joshi [bjorn: Use vmalloc instead of composing the ELF on the fly] Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 728d421fffe9..b60c3a31b75d 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -394,6 +394,21 @@ enum rproc_crash_type { RPROC_FATAL_ERROR, }; +/** + * struct rproc_dump_segment - segment info from ELF header + * @node: list node related to the rproc segment list + * @da: device address of the segment + * @size: size of the segment + */ +struct rproc_dump_segment { + struct list_head node; + + dma_addr_t da; + size_t size; + + loff_t offset; +}; + /** * struct rproc - represents a physical remote processor device * @node: list node of this rproc object @@ -424,6 +439,7 @@ enum rproc_crash_type { * @cached_table: copy of the resource table * @table_sz: size of @cached_table * @has_iommu: flag to indicate if remote processor is behind an MMU + * @dump_segments: list of segments in the firmware */ struct rproc { struct list_head node; @@ -455,6 +471,7 @@ struct rproc { size_t table_sz; bool has_iommu; bool auto_boot; + struct list_head dump_segments; }; /** @@ -534,6 +551,7 @@ void rproc_free(struct rproc *rproc); int rproc_boot(struct rproc *rproc); void rproc_shutdown(struct rproc *rproc); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); +int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev) { -- cgit v1.2.3 From c1d35c1ab4242464a0e5953ae69de8aa78156c6c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 5 Jan 2018 16:04:18 -0800 Subject: remoteproc: Rename "load_rsc_table" to "parse_fw" The resource table is just one possible source of information that can be extracted from the firmware file. Generalize this interface to allow drivers to override this with parsers of other types of information. Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index b60c3a31b75d..f16864acedad 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -344,7 +344,7 @@ struct rproc_ops { int (*stop)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); void * (*da_to_va)(struct rproc *rproc, u64 da, int len); - int (*load_rsc_table)(struct rproc *rproc, const struct firmware *fw); + int (*parse_fw)(struct rproc *rproc, const struct firmware *fw); struct resource_table *(*find_loaded_rsc_table)( struct rproc *rproc, const struct firmware *fw); int (*load)(struct rproc *rproc, const struct firmware *fw); -- cgit v1.2.3 From 4dd27f544c84c4d079049dd716beee192fcc7e03 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 5 Jan 2018 16:04:19 -0800 Subject: soc: qcom: mdt-loader: Return relocation base In order to implement support for grabbing core dumps in remoteproc it's necessary to know the relocated base of the image, as the offsets from the virtual memory base might not be based on the physical address. Return the adjusted physical base address to the caller. Acked-by: Andy Gross Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/mdt_loader.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h index bd8e0864b059..5b98bbdabc25 100644 --- a/include/linux/soc/qcom/mdt_loader.h +++ b/include/linux/soc/qcom/mdt_loader.h @@ -14,6 +14,7 @@ struct firmware; ssize_t qcom_mdt_get_size(const struct firmware *fw); int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, - phys_addr_t mem_phys, size_t mem_size); + phys_addr_t mem_phys, size_t mem_size, + phys_addr_t *reloc_base); #endif -- cgit v1.2.3 From 9b2c45d479d0fb8647c9e83359df69162b5fbe5f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 12 Feb 2018 20:00:20 +0100 Subject: net: make getname() functions return length rather than use int* parameter Changes since v1: Added changes in these files: drivers/infiniband/hw/usnic/usnic_transport.c drivers/staging/lustre/lnet/lnet/lib-socket.c drivers/target/iscsi/iscsi_target_login.c drivers/vhost/net.c fs/dlm/lowcomms.c fs/ocfs2/cluster/tcp.c security/tomoyo/network.c Before: All these functions either return a negative error indicator, or store length of sockaddr into "int *socklen" parameter and return zero on success. "int *socklen" parameter is awkward. For example, if caller does not care, it still needs to provide on-stack storage for the value it does not need. None of the many FOO_getname() functions of various protocols ever used old value of *socklen. They always just overwrite it. This change drops this parameter, and makes all these functions, on success, return length of sockaddr. It's always >= 0 and can be differentiated from an error. Tests in callers are changed from "if (err)" to "if (err < 0)", where needed. rpc_sockname() lost "int buflen" parameter, since its only use was to be passed to kernel_getsockname() as &buflen and subsequently not used in any way. Userspace API is not changed. text data bss dec hex filename 30108430 2633624 873672 33615726 200ef6e vmlinux.before.o 30108109 2633612 873672 33615393 200ee21 vmlinux.o Signed-off-by: Denys Vlasenko CC: David S. Miller CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-decnet-user@lists.sourceforge.net CC: linux-wireless@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: linux-sctp@vger.kernel.org CC: linux-nfs@vger.kernel.org CC: linux-x25@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/net.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 91216b16feb7..000d1aada74f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -146,7 +146,7 @@ struct proto_ops { struct socket *newsock, int flags, bool kern); int (*getname) (struct socket *sock, struct sockaddr *addr, - int *sockaddr_len, int peer); + int peer); __poll_t (*poll) (struct file *file, struct socket *sock, struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, @@ -294,10 +294,8 @@ int kernel_listen(struct socket *sock, int backlog); int kernel_accept(struct socket *sock, struct socket **newsock, int flags); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags); -int kernel_getsockname(struct socket *sock, struct sockaddr *addr, - int *addrlen); -int kernel_getpeername(struct socket *sock, struct sockaddr *addr, - int *addrlen); +int kernel_getsockname(struct socket *sock, struct sockaddr *addr); +int kernel_getpeername(struct socket *sock, struct sockaddr *addr); int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen); int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, -- cgit v1.2.3 From 880f5b388252fedb26c70bb80ad1d7c8abbc0607 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 30 Oct 2017 23:11:14 -0700 Subject: remoteproc: Pass type of shutdown to subdev remove remoteproc instances can be stopped either by invoking shutdown or by an attempt to recover from a crash. For some subdev types it's expected to clean up gracefully during a shutdown, but are unable to do so during a crash - so pass this information to the subdev remove functions. Acked-By: Chris Lew Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index f16864acedad..d09a9c7af109 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -478,13 +478,14 @@ struct rproc { * struct rproc_subdev - subdevice tied to a remoteproc * @node: list node related to the rproc subdevs list * @probe: probe function, called as the rproc is started - * @remove: remove function, called as the rproc is stopped + * @remove: remove function, called as the rproc is being stopped, the @crashed + * parameter indicates if this originates from the a recovery */ struct rproc_subdev { struct list_head node; int (*probe)(struct rproc_subdev *subdev); - void (*remove)(struct rproc_subdev *subdev); + void (*remove)(struct rproc_subdev *subdev, bool crashed); }; /* we currently support only two vrings per rvdev */ @@ -568,7 +569,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev) void rproc_add_subdev(struct rproc *rproc, struct rproc_subdev *subdev, int (*probe)(struct rproc_subdev *subdev), - void (*remove)(struct rproc_subdev *subdev)); + void (*remove)(struct rproc_subdev *subdev, bool graceful)); void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev); -- cgit v1.2.3 From 1a57feb847c56d6193f67d0e892c24e71f9e3ab1 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 13 Feb 2018 12:26:23 +0300 Subject: net: Introduce net_sem for protection of pernet_list Currently, the mutex is mostly used to protect pernet operations list. It orders setup_net() and cleanup_net() with parallel {un,}register_pernet_operations() calls, so ->exit{,batch} methods of the same pernet operations are executed for a dying net, as were used to call ->init methods, even after the net namespace is unlinked from net_namespace_list in cleanup_net(). But there are several problems with scalability. The first one is that more than one net can't be created or destroyed at the same moment on the node. For big machines with many cpus running many containers it's very sensitive. The second one is that it's need to synchronize_rcu() after net is removed from net_namespace_list(): Destroy net_ns: cleanup_net() mutex_lock(&net_mutex) list_del_rcu(&net->list) synchronize_rcu() <--- Sleep there for ages list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list) list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list) mutex_unlock(&net_mutex) This primitive is not fast, especially on the systems with many processors and/or when preemptible RCU is enabled in config. So, all the time, while cleanup_net() is waiting for RCU grace period, creation of new net namespaces is not possible, the tasks, who makes it, are sleeping on the same mutex: Create net_ns: copy_net_ns() mutex_lock_killable(&net_mutex) <--- Sleep there for ages I observed 20-30 seconds hangs of "unshare -n" on ordinary 8-cpu laptop with preemptible RCU enabled after CRIU tests round is finished. The solution is to convert net_mutex to the rw_semaphore and add fine grain locks to really small number of pernet_operations, what really need them. Then, pernet_operations::init/::exit methods, modifying the net-related data, will require down_read() locking only, while down_write() will be used for changing pernet_list (i.e., when modules are being loaded and unloaded). This gives signify performance increase, after all patch set is applied, like you may see here: %for i in {1..10000}; do unshare -n bash -c exit; done *before* real 1m40,377s user 0m9,672s sys 0m19,928s *after* real 0m17,007s user 0m5,311s sys 0m11,779 (5.8 times faster) This patch starts replacing net_mutex to net_sem. It adds rw_semaphore, describes the variables it protects, and makes to use, where appropriate. net_mutex is still present, and next patches will kick it out step-by-step. Signed-off-by: Kirill Tkhai Acked-by: Andrei Vagin Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 1fdcde96eb65..e9ee9ad0a681 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -36,6 +36,7 @@ extern int rtnl_is_locked(void); extern wait_queue_head_t netdev_unregistering_wq; extern struct mutex net_mutex; +extern struct rw_semaphore net_sem; #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); -- cgit v1.2.3 From b1d03c1d12abbfa7de127772f281b309cf1650c3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 12 Feb 2018 16:48:21 +0000 Subject: iommu/vt-d: Clean/document fault status flags So one could decode them without opening the specification. Signed-off-by: Dmitry Safonov Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 8dad3dd26eae..ef169d67df92 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -209,12 +209,12 @@ #define DMA_FECTL_IM (((u32)1) << 31) /* FSTS_REG */ -#define DMA_FSTS_PPF ((u32)2) -#define DMA_FSTS_PFO ((u32)1) -#define DMA_FSTS_IQE (1 << 4) -#define DMA_FSTS_ICE (1 << 5) -#define DMA_FSTS_ITE (1 << 6) -#define DMA_FSTS_PRO (1 << 7) +#define DMA_FSTS_PFO (1 << 0) /* Primary Fault Overflow */ +#define DMA_FSTS_PPF (1 << 1) /* Primary Pending Fault */ +#define DMA_FSTS_IQE (1 << 4) /* Invalidation Queue Error */ +#define DMA_FSTS_ICE (1 << 5) /* Invalidation Completion Error */ +#define DMA_FSTS_ITE (1 << 6) /* Invalidation Time-out Error */ +#define DMA_FSTS_PRO (1 << 7) /* Page Request Overflow */ #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) /* FRCD_REG, 32 bits access */ -- cgit v1.2.3 From c5611a8751e67595e4e7d3feaff3c900b92094b9 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 5 Feb 2018 05:45:53 -0500 Subject: iommu: Do not return error code for APIs with size_t return type Currently, iommu_unmap, iommu_unmap_fast and iommu_map_sg return size_t. However, some of the return values are error codes (< 0), which can be misinterpreted as large size. Therefore, returning size 0 instead to signify failure to map/unmap. Cc: Joerg Roedel Cc: Alex Williamson Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 41b8c5757859..19938ee6eb31 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -465,23 +465,23 @@ static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, return -ENODEV; } -static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size) +static inline size_t iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) { - return -ENODEV; + return 0; } -static inline int iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, - int gfp_order) +static inline size_t iommu_unmap_fast(struct iommu_domain *domain, + unsigned long iova, int gfp_order) { - return -ENODEV; + return 0; } static inline size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) { - return -ENODEV; + return 0; } static inline void iommu_flush_tlb_all(struct iommu_domain *domain) -- cgit v1.2.3 From 297dd12cb104151797fd649433a2157b585f1718 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 13 Feb 2018 14:15:36 +0100 Subject: net: avoid including xdp.h in filter.h If is sufficient with a forward declaration of struct xdp_rxq_info in linux/filter.h, which avoids including net/xdp.h. This was originally suggested by John Fastabend during the review phase, but wasn't included in the final patchset revision. Thus, this followup. Suggested-by: John Fastabend Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 276932d75975..fdb691b520c0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -30,6 +29,7 @@ struct sk_buff; struct sock; struct seccomp_data; struct bpf_prog_aux; +struct xdp_rxq_info; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function -- cgit v1.2.3 From c65e774fb3f6af212641538694b9778ff9ab4300 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:53 +0300 Subject: x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable For boot-time switching between 4- and 5-level paging we need to be able to fold p4d page table level at runtime. It requires variable PGDIR_SHIFT and PTRS_PER_P4D. The change doesn't affect the kernel image size much: text data bss dec hex filename 8628091 4734304 1368064 14730459 e0c4db vmlinux.before 8628393 4734340 1368064 14730797 e0c62d vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-7-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/kasan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index adc13474a53b..d6459bd1376d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD]; -extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; +extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D]; void kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end); -- cgit v1.2.3 From 8b4282e6b8e239d8ce68ab884c89335cc6fdd7c7 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Tue, 13 Feb 2018 15:20:50 +0000 Subject: ACPI/IORT: Add msi address regions reservation helper On some platforms msi parent address regions have to be excluded from normal IOVA allocation in that they are detected and decoded in a HW specific way by system components and so they cannot be considered normal IOVA address space. Add a helper function that retrieves ITS address regions - the msi parent - through IORT device <-> ITS mappings and reserves it so that these regions will not be translated by IOMMU and will be excluded from IOVA allocations. The function checks for the smmu model number and only applies the msi reservation if the platform requires it. Signed-off-by: Shameer Kolothum Reviewed-by: Lorenzo Pieralisi [For the ITS part] Reviewed-by: Marc Zyngier Signed-off-by: Joerg Roedel --- include/linux/acpi_iort.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 2f7a29242b87..38cd77b39a64 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -26,7 +26,8 @@ #define IORT_IRQ_MASK(irq) (irq & 0xffffffffULL) #define IORT_IRQ_TRIGGER_MASK(irq) ((irq >> 32) & 0xffffffffULL) -int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node); +int iort_register_domain_token(int trans_id, phys_addr_t base, + struct fwnode_handle *fw_node); void iort_deregister_domain_token(int trans_id); struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT @@ -38,6 +39,7 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); const struct iommu_ops *iort_iommu_configure(struct device *dev); +int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); #else static inline void acpi_iort_init(void) { } static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) @@ -52,6 +54,9 @@ static inline void iort_dma_setup(struct device *dev, u64 *dma_addr, static inline const struct iommu_ops *iort_iommu_configure( struct device *dev) { return NULL; } +static inline +int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) +{ return 0; } #endif #endif /* __ACPI_IORT_H__ */ -- cgit v1.2.3 From 9b00bc7b901ff672a9252002d3810fdf9489bc64 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 12 Feb 2018 13:45:30 +0100 Subject: spi: spi-gpio: Rewrite to use GPIO descriptors This converts the bit-banged GPIO SPI driver to looking up and using GPIO descriptors to get a handle on GPIO lines for SCK, MOSI, MISO and all CS lines. All existing board files are converted in one go to keep it all consistent. With these conversions I rarely find any interrim steps that makes any sense. Device tree probing and GPIO handling should work like before also after this patch. For board files, we stop using controller data to pass the GPIO line for chip select, instead we pass this as a GPIO descriptor lookup like everything else. In some s3c24xx machines the names of the SPI devices were set to "spi-gpio" rather than "spi_gpio" which can never have worked, I fixed it working (I guess) as part of this patch set. Sometimes I wonder how this code got upstream in the first place, it obviously is not tested. mach-s3c64xx/mach-smartq.c has the same problem and additionally defines the *same* GPIO line for MOSI and MISO which is not going to be accepted by gpiolib. As the lines were number 1,2,2 I assumed it was a typo and use lines 1,2,3. A comment gives awat that line 0 is chip select though no actual SPI device is provided for the LCD supposed to be on this bit-banged SPI bus. I left it intact instead of just deleting the bus though. Kill off board file code that try to initialize the SPI lines to the same values that they will later be set by the spi_gpio driver anyways. Given the huge number of weird things in these board files I do not think this code is very tested or put in with much afterthought anyways. In order to assert that we do not get performance regressions on this crucial bing-banged driver, a ran a script like this dumping the Ilitek ILI9322 regmap 10000 times (it has no caching obviously) on an otherwise idle system in two iterations before and after the patches: #!/bin/sh for run in `seq 10000` do cat /debug/regmap/spi0.0/registers > /dev/null done Before the patch: time test.sh real 3m 41.03s user 0m 29.41s sys 3m 7.22s time test.sh real 3m 44.24s user 0m 32.31s sys 3m 7.60s After the patch: time test.sh real 3m 41.32s user 0m 28.92s sys 3m 8.08s time test.sh real 3m 39.92s user 0m 30.20s sys 3m 5.56s So any performance differences seems to be in the error margin. Signed-off-by: Linus Walleij Acked-by: Olof Johansson Reviewed-by: Andy Shevchenko Signed-off-by: Mark Brown --- include/linux/spi/spi_gpio.h | 49 +------------------------------------------- 1 file changed, 1 insertion(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h index e7bd89a59cd1..9e7e83d8645b 100644 --- a/include/linux/spi/spi_gpio.h +++ b/include/linux/spi/spi_gpio.h @@ -8,64 +8,17 @@ * - id the same as the SPI bus number it implements * - dev.platform data pointing to a struct spi_gpio_platform_data * - * Or, see the driver code for information about speedups that are - * possible on platforms that support inlined access for GPIOs (no - * spi_gpio_platform_data is used). - * - * Use spi_board_info with these busses in the usual way, being sure - * that the controller_data being the GPIO used for each device's - * chipselect: - * - * static struct spi_board_info ... [] = { - * ... - * // this slave uses GPIO 42 for its chipselect - * .controller_data = (void *) 42, - * ... - * // this one uses GPIO 86 for its chipselect - * .controller_data = (void *) 86, - * ... - * }; - * - * If chipselect is not used (there's only one device on the bus), assign - * SPI_GPIO_NO_CHIPSELECT to the controller_data: - * .controller_data = (void *) SPI_GPIO_NO_CHIPSELECT; - * - * If the MISO or MOSI pin is not available then it should be set to - * SPI_GPIO_NO_MISO or SPI_GPIO_NO_MOSI. + * Use spi_board_info with these busses in the usual way. * * If the bitbanged bus is later switched to a "native" controller, * that platform_device and controller_data should be removed. */ -#define SPI_GPIO_NO_CHIPSELECT ((unsigned long)-1l) -#define SPI_GPIO_NO_MISO ((unsigned long)-1l) -#define SPI_GPIO_NO_MOSI ((unsigned long)-1l) - /** * struct spi_gpio_platform_data - parameter for bitbanged SPI master - * @sck: number of the GPIO used for clock output - * @mosi: number of the GPIO used for Master Output, Slave In (MOSI) data - * @miso: number of the GPIO used for Master Input, Slave Output (MISO) data * @num_chipselect: how many slaves to allow - * - * All GPIO signals used with the SPI bus managed through this driver - * (chipselects, MOSI, MISO, SCK) must be configured as GPIOs, instead - * of some alternate function. - * - * It can be convenient to use this driver with pins that have alternate - * functions associated with a "native" SPI controller if a driver for that - * controller is not available, or is missing important functionality. - * - * On platforms which can do so, configure MISO with a weak pullup unless - * there's an external pullup on that signal. That saves power by avoiding - * floating signals. (A weak pulldown would save power too, but many - * drivers expect to see all-ones data as the no slave "response".) */ struct spi_gpio_platform_data { - unsigned sck; - unsigned long mosi; - unsigned long miso; - u16 num_chipselect; }; -- cgit v1.2.3 From 330c7272c40e965b8ab510d1022acd6e6a32e9c8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 13 Feb 2018 08:52:00 -0800 Subject: net: Make dn_ptr depend on CONFIG_DECNET Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5eef6c8e2741..d2ef35e00626 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1800,7 +1800,9 @@ struct net_device { #endif void *atalk_ptr; struct in_device __rcu *ip_ptr; +#if IS_ENABLED(CONFIG_DECNET) struct dn_dev __rcu *dn_ptr; +#endif struct inet6_dev __rcu *ip6_ptr; void *ax25_ptr; struct wireless_dev *ieee80211_ptr; -- cgit v1.2.3 From 19ff13f2a411d99af67d8e51867d54b86e1bf017 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 13 Feb 2018 08:52:01 -0800 Subject: net: Make ax25_ptr depend on CONFIG_AX25 Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d2ef35e00626..936dc2c9dca1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1804,7 +1804,9 @@ struct net_device { struct dn_dev __rcu *dn_ptr; #endif struct inet6_dev __rcu *ip6_ptr; +#if IS_ENABLED(CONFIG_AX25) void *ax25_ptr; +#endif struct wireless_dev *ieee80211_ptr; struct wpan_dev *ieee802154_ptr; #if IS_ENABLED(CONFIG_MPLS_ROUTING) -- cgit v1.2.3 From 89e58148fbf2e4cbd84006e263061c19a2b47adf Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 13 Feb 2018 08:52:02 -0800 Subject: net: Make atalk_ptr depend on ATALK or IRDA Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/atalk.h | 2 ++ include/linux/netdevice.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 4d356e168692..40373920ea58 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -113,10 +113,12 @@ extern void aarp_proto_init(void); /* Inter module exports */ /* Give a device find its atif control structure */ +#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK) static inline struct atalk_iface *atalk_find_dev(struct net_device *dev) { return dev->atalk_ptr; } +#endif extern struct atalk_addr *atalk_find_dev_addr(struct net_device *dev); extern struct net_device *atrtr_get_dev(struct atalk_addr *sa); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 936dc2c9dca1..dbe6344b727a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1798,7 +1798,9 @@ struct net_device { #if IS_ENABLED(CONFIG_TIPC) struct tipc_bearer __rcu *tipc_ptr; #endif +#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK) void *atalk_ptr; +#endif struct in_device __rcu *ip_ptr; #if IS_ENABLED(CONFIG_DECNET) struct dn_dev __rcu *dn_ptr; -- cgit v1.2.3 From eb09f1feb8e5999390a6f149307cb88354232680 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Tue, 23 Jan 2018 08:50:56 -0800 Subject: virtchnl: Add virtchl structures to support queue channels This patch defines new structs in support of the virtchannel message that the VF sends to the PF to create a queue channel specified by the user via tc tool. Signed-off-by: Harshitha Ramamurthy Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 3ce61342fa31..1f652ceecf35 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -136,6 +136,8 @@ enum virtchnl_ops { VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28, VIRTCHNL_OP_REQUEST_QUEUES = 29, + VIRTCHNL_OP_ENABLE_CHANNELS = 30, + VIRTCHNL_OP_DISABLE_CHANNELS = 31, }; /* This macro is used to generate a compilation error if a structure @@ -244,6 +246,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000 #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 +#define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -496,6 +499,30 @@ struct virtchnl_rss_hena { VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena); +/* VIRTCHNL_OP_ENABLE_CHANNELS + * VIRTCHNL_OP_DISABLE_CHANNELS + * VF sends these messages to enable or disable channels based on + * the user specified queue count and queue offset for each traffic class. + * This struct encompasses all the information that the PF needs from + * VF to create a channel. + */ +struct virtchnl_channel_info { + u16 count; /* number of queues in a channel */ + u16 offset; /* queues in a channel start from 'offset' */ + u32 pad; + u64 max_tx_rate; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info); + +struct virtchnl_tc_info { + u32 num_tc; + u32 pad; + struct virtchnl_channel_info list[1]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info); + /* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other @@ -711,6 +738,19 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_REQUEST_QUEUES: valid_len = sizeof(struct virtchnl_vf_res_request); break; + case VIRTCHNL_OP_ENABLE_CHANNELS: + valid_len = sizeof(struct virtchnl_tc_info); + if (msglen >= valid_len) { + struct virtchnl_tc_info *vti = + (struct virtchnl_tc_info *)msg; + valid_len += vti->num_tc * + sizeof(struct virtchnl_channel_info); + if (vti->num_tc == 0) + err_msg_format = true; + } + break; + case VIRTCHNL_OP_DISABLE_CHANNELS: + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit v1.2.3 From 0718e560a330599d15fddc37651d693c7a09e49e Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Tue, 23 Jan 2018 08:51:03 -0800 Subject: virtchnl: Add a macro to check the size of a union This patch adds a macro to check if the size of a union is correct. It throws a divide by zero error if the union is not of the correct size. Signed-off-by: Harshitha Ramamurthy Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 1f652ceecf35..6fe630ebbf23 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -140,13 +140,15 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_CHANNELS = 31, }; -/* This macro is used to generate a compilation error if a structure +/* These macros are used to generate compilation errors if a structure/union * is not exactly the correct length. It gives a divide by zero error if the - * structure is not of the correct size, otherwise it creates an enum that is - * never used. + * structure/union is not of the correct size, otherwise it creates an enum + * that is never used. */ #define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \ { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } +#define VIRTCHNL_CHECK_UNION_LEN(n, X) enum virtchnl_static_asset_enum_##X \ + { virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) } /* Virtual channel message descriptor. This overlays the admin queue * descriptor. All other data is passed in external buffers. -- cgit v1.2.3 From 3872c8d44c2e489bcce0c743e808a4135e8da228 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Tue, 23 Jan 2018 08:51:04 -0800 Subject: virtchnl: Add filter data structures This patch adds infrastructure to send virtchnl messages to the PF to configure filters on the VF. The patch adds a struct called virtchnl_filter which contains information about the fields in the user-specified tc filter. Signed-off-by: Harshitha Ramamurthy Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 6fe630ebbf23..b0a7f315bfbe 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -138,6 +138,8 @@ enum virtchnl_ops { VIRTCHNL_OP_REQUEST_QUEUES = 29, VIRTCHNL_OP_ENABLE_CHANNELS = 30, VIRTCHNL_OP_DISABLE_CHANNELS = 31, + VIRTCHNL_OP_ADD_CLOUD_FILTER = 32, + VIRTCHNL_OP_DEL_CLOUD_FILTER = 33, }; /* These macros are used to generate compilation errors if a structure/union @@ -525,6 +527,57 @@ struct virtchnl_tc_info { VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info); +/* VIRTCHNL_ADD_CLOUD_FILTER + * VIRTCHNL_DEL_CLOUD_FILTER + * VF sends these messages to add or delete a cloud filter based on the + * user specified match and action filters. These structures encompass + * all the information that the PF needs from the VF to add/delete a + * cloud filter. + */ + +struct virtchnl_l4_spec { + u8 src_mac[ETH_ALEN]; + u8 dst_mac[ETH_ALEN]; + __be16 vlan_id; + __be16 pad; /* reserved for future use */ + __be32 src_ip[4]; + __be32 dst_ip[4]; + __be16 src_port; + __be16 dst_port; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(52, virtchnl_l4_spec); + +union virtchnl_flow_spec { + struct virtchnl_l4_spec tcp_spec; + u8 buffer[128]; /* reserved for future use */ +}; + +VIRTCHNL_CHECK_UNION_LEN(128, virtchnl_flow_spec); + +enum virtchnl_action { + /* action types */ + VIRTCHNL_ACTION_DROP = 0, + VIRTCHNL_ACTION_TC_REDIRECT, +}; + +enum virtchnl_flow_type { + /* flow types */ + VIRTCHNL_TCP_V4_FLOW = 0, + VIRTCHNL_TCP_V6_FLOW, +}; + +struct virtchnl_filter { + union virtchnl_flow_spec data; + union virtchnl_flow_spec mask; + enum virtchnl_flow_type flow_type; + enum virtchnl_action action; + u32 action_meta; + __u8 field_flags; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); + /* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other @@ -753,6 +806,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, break; case VIRTCHNL_OP_DISABLE_CHANNELS: break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: + valid_len = sizeof(struct virtchnl_filter); + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: + valid_len = sizeof(struct virtchnl_filter); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit v1.2.3 From a0e37da2a542acb6069b9e10d8aba3be4e5204d7 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Mon, 12 Feb 2018 19:32:37 -0600 Subject: ARM: OMAP2+: Cleanup omap_gpio_dev_attr usage The omap_gpio_dev_attr data was used to supply instance-specific data for legacy non-DT devices. The GPIO legacy device support has been cleaned up in commit 14944934f8ac ("ARM: OMAP2+: Remove legacy gpio code") a while ago and this data is therefore no longer needed. So, cleanup the structure and all the associated data in various hwmod data files. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- include/linux/platform_data/gpio-omap.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index cb2618147c34..8612855691b2 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -157,11 +157,6 @@ #define OMAP_MPUIO(nr) (OMAP_MAX_GPIO_LINES + (nr)) #define OMAP_GPIO_IS_MPUIO(nr) ((nr) >= OMAP_MAX_GPIO_LINES) -struct omap_gpio_dev_attr { - int bank_width; /* GPIO bank width */ - bool dbck_flag; /* dbck required or not - True for OMAP3&4 */ -}; - struct omap_gpio_reg_offs { u16 revision; u16 direction; -- cgit v1.2.3 From 1cddc364584e76c16354d34326c671aac2a23e4f Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Mon, 12 Feb 2018 19:32:40 -0600 Subject: ARM: OMAP2+: Cleanup omap2_spi_dev_attr and other legacy data The omap2_spi_dev_attr data was used to supply instance-specific data for legacy non-DT devices. The SPI legacy device support including the usage of the hwmod class revision data has been dropped in commit 6f3ab009a178 ("ARM: OMAP2+: Remove unused legacy code for device init") and this data is therefore no longer needed. So, cleanup the structure and all the associated data in various hwmod data files. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- include/linux/platform_data/spi-omap2-mcspi.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h index 13c83a25958a..0bf9fddb8306 100644 --- a/include/linux/platform_data/spi-omap2-mcspi.h +++ b/include/linux/platform_data/spi-omap2-mcspi.h @@ -2,10 +2,6 @@ #ifndef _OMAP2_MCSPI_H #define _OMAP2_MCSPI_H -#define OMAP2_MCSPI_REV 0 -#define OMAP3_MCSPI_REV 1 -#define OMAP4_MCSPI_REV 2 - #define OMAP4_MCSPI_REG_OFFSET 0x100 #define MCSPI_PINDIR_D0_IN_D1_OUT 0 @@ -17,10 +13,6 @@ struct omap2_mcspi_platform_config { unsigned int pin_dir:1; }; -struct omap2_mcspi_dev_attr { - unsigned short num_chipselect; -}; - struct omap2_mcspi_device_config { unsigned turbo_mode:1; -- cgit v1.2.3 From 0693036ca800ab471e8f28caeb3a9ac4d77af810 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Mon, 12 Feb 2018 19:32:41 -0600 Subject: ARM: OMAP2+: Cleanup omap_mcbsp_dev_attr and other legacy data The omap_mcbsp_dev_attr data was used to supply instance-specific data for legacy non-DT devices. The legacy McBSP device support including the usage of the hwmod class revision data has been dropped in commit 48f6693790aa ("ARM: OMAP2+: Remove unused legacy code for McBSP") and this data is therefore no longer needed. So, cleanup the structure and all the associated data in various hwmod data files. Cc: Peter Ujfalusi Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- include/linux/platform_data/asoc-ti-mcbsp.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/asoc-ti-mcbsp.h b/include/linux/platform_data/asoc-ti-mcbsp.h index e684543254f3..e319d0a2ec82 100644 --- a/include/linux/platform_data/asoc-ti-mcbsp.h +++ b/include/linux/platform_data/asoc-ti-mcbsp.h @@ -25,10 +25,6 @@ #include #include -#define MCBSP_CONFIG_TYPE2 0x2 -#define MCBSP_CONFIG_TYPE3 0x3 -#define MCBSP_CONFIG_TYPE4 0x4 - /* Platform specific configuration */ struct omap_mcbsp_ops { void (*request)(unsigned int); @@ -47,14 +43,6 @@ struct omap_mcbsp_platform_data { int (*force_ick_on)(struct clk *clk, bool force_on); }; -/** - * omap_mcbsp_dev_attr - OMAP McBSP device attributes for omap_hwmod - * @sidetone: name of the sidetone device - */ -struct omap_mcbsp_dev_attr { - const char *sidetone; -}; - void omap3_mcbsp_init_pdata_callback(struct omap_mcbsp_platform_data *pdata); #endif -- cgit v1.2.3 From 052fddfb3c4e5f3d413d3f6b8dffe1b7192026af Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 14 Feb 2018 01:07:42 +0100 Subject: net: ptp: Add stub for ptp_classify_raw() When NET_PTP_CLASSIFY is disabled, a stub function is required in order that the drivers compile. Signed-off-by: Andrew Lunn Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index a079656b614c..059242030631 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -75,5 +75,9 @@ void __init ptp_classifier_init(void); static inline void ptp_classifier_init(void) { } +static inline unsigned int ptp_classify_raw(struct sk_buff *skb) +{ + return PTP_CLASS_NONE; +} #endif #endif /* _PTP_CLASSIFY_H_ */ -- cgit v1.2.3 From 02d92f7903647119e125b24f5470f96cee0d4b4b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 19 Jan 2018 16:13:01 -0800 Subject: net/mlx5: CQ Database per EQ Before this patch the driver had one CQ database protected via one spinlock, this spinlock is meant to synchronize between CQ adding/removing and CQ IRQ interrupt handling. On a system with large number of CPUs and on a work load that requires lots of interrupts, this global spinlock becomes a very nasty hotspot and introduces a contention between the active cores, which will significantly hurt performance and becomes a bottleneck that prevents seamless cpu scaling. To solve this we simply move the CQ database and its spinlock to be per EQ (IRQ), thus per core. Tested with: system: 2 sockets, 14 cores per socket, hyperthreading, 2x14x2=56 cores netperf command: ./super_netperf 200 -P 0 -t TCP_RR -H -l 30 -- -r 300,300 -o -s 1M,1M -S 1M,1M WITHOUT THIS PATCH: Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: all 4.32 0.00 36.15 0.09 0.00 34.02 0.00 0.00 0.00 25.41 Samples: 2M of event 'cycles:pp', Event count (approx.): 1554616897271 Overhead Command Shared Object Symbol + 14.28% swapper [kernel.vmlinux] [k] intel_idle + 12.25% swapper [kernel.vmlinux] [k] queued_spin_lock_slowpath + 10.29% netserver [kernel.vmlinux] [k] queued_spin_lock_slowpath + 1.32% netserver [kernel.vmlinux] [k] mlx5e_xmit WITH THIS PATCH: Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: all 4.27 0.00 34.31 0.01 0.00 18.71 0.00 0.00 0.00 42.69 Samples: 2M of event 'cycles:pp', Event count (approx.): 1498132937483 Overhead Command Shared Object Symbol + 23.33% swapper [kernel.vmlinux] [k] intel_idle + 1.69% netserver [kernel.vmlinux] [k] mlx5e_xmit Tested-by: Song Liu Signed-off-by: Saeed Mahameed Reviewed-by: Gal Pressman --- include/linux/mlx5/cq.h | 3 +-- include/linux/mlx5/driver.h | 22 +++++++++------------- 2 files changed, 10 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 48c181a2acc9..06ba425a6ad7 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -60,6 +60,7 @@ struct mlx5_core_cq { } tasklet_ctx; int reset_notify_added; struct list_head reset_notify; + struct mlx5_eq *eq; }; @@ -171,8 +172,6 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL); } -int mlx5_init_cq_table(struct mlx5_core_dev *dev); -void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6ed79a8a8318..96e003db2bcd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -375,8 +375,15 @@ struct mlx5_eq_pagefault { mempool_t *pool; }; +struct mlx5_cq_table { + /* protect radix tree */ + spinlock_t lock; + struct radix_tree_root tree; +}; + struct mlx5_eq { struct mlx5_core_dev *dev; + struct mlx5_cq_table cq_table; __be32 __iomem *doorbell; u32 cons_index; struct mlx5_buf buf; @@ -526,13 +533,6 @@ struct mlx5_core_health { struct delayed_work recover_work; }; -struct mlx5_cq_table { - /* protect radix tree - */ - spinlock_t lock; - struct radix_tree_root tree; -}; - struct mlx5_qp_table { /* protect radix tree */ @@ -654,10 +654,6 @@ struct mlx5_priv { struct dentry *cmdif_debugfs; /* end: qp staff */ - /* start: cq staff */ - struct mlx5_cq_table cq_table; - /* end: cq staff */ - /* start: mkey staff */ struct mlx5_mkey_table mkey_table; /* end: mkey staff */ @@ -1053,12 +1049,12 @@ int mlx5_eq_init(struct mlx5_core_dev *dev); void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); +void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); +void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, enum mlx5_eq_type type); -- cgit v1.2.3 From f105b45bf77ced96e516e1cd771c41bb7e8c830b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 1 Feb 2018 03:32:00 -0800 Subject: net/mlx5: CQ hold/put API Now as the CQ table is per EQ, add an API to hold/put CQ to be used from eq.c in downstream patch. Signed-off-by: Saeed Mahameed Reviewed-by: Gal Pressman --- include/linux/mlx5/cq.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 06ba425a6ad7..445ad194e0fe 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -172,6 +172,17 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL); } +static inline void mlx5_cq_hold(struct mlx5_core_cq *cq) +{ + refcount_inc(&cq->refcount); +} + +static inline void mlx5_cq_put(struct mlx5_core_cq *cq) +{ + if (refcount_dec_and_test(&cq->refcount)) + complete(&cq->free); +} + int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); -- cgit v1.2.3 From 3ac7afdbcf243d6c79c1569d9e29aef0096e4743 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 1 Feb 2018 04:37:07 -0800 Subject: net/mlx5: Move CQ completion and event forwarding logic to eq.c Since CQ tree is now per EQ, CQ completion and event forwarding became specific implementation of EQ logic, this patch moves that logic to eq.c and makes those functions static. Signed-off-by: Saeed Mahameed Reviewed-by: Gal Pressman --- include/linux/mlx5/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96e003db2bcd..09e2f3e8753c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1049,12 +1049,10 @@ int mlx5_eq_init(struct mlx5_core_dev *dev); void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, enum mlx5_eq_type type); -- cgit v1.2.3 From 3ec5693b17314b58977ba3c8d720d1f9cfef39f8 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 1 Feb 2018 05:42:06 -0800 Subject: net/mlx5: Remove redundant EQ API exports EQ structure and API is private to mlx5_core driver only, external drivers should not have access or the means to manipulate EQ objects. Remove redundant exports and move API functions out of the linux/mlx5 include directory into the driver's mlx5_core.h private include file. Signed-off-by: Saeed Mahameed Reviewed-by: Gal Pressman --- include/linux/mlx5/driver.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 09e2f3e8753c..2860a253275b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1045,20 +1045,11 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -int mlx5_eq_init(struct mlx5_core_dev *dev); -void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type); -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_start_eqs(struct mlx5_core_dev *dev); -void mlx5_stop_eqs(struct mlx5_core_dev *dev); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); @@ -1070,14 +1061,6 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); -int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen); -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); -void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); -int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); -void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node); -- cgit v1.2.3 From 388ca8be00370db132464e27f745b8a0add19fcb Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Tue, 2 Jan 2018 16:08:06 +0200 Subject: IB/mlx5: Implement fragmented completion queue (CQ) The current implementation of create CQ requires contiguous memory, such requirement is problematic once the memory is fragmented or the system is low in memory, it causes for failures in dma_zalloc_coherent(). This patch implements new scheme of fragmented CQ to overcome this issue by introducing new type: 'struct mlx5_frag_buf_ctrl' to allocate fragmented buffers, rather than contiguous ones. Base the Completion Queues (CQs) on this new fragmented buffer. It fixes following crashes: kworker/29:0: page allocation failure: order:6, mode:0x80d0 CPU: 29 PID: 8374 Comm: kworker/29:0 Tainted: G OE 3.10.0 Workqueue: ib_cm cm_work_handler [ib_cm] Call Trace: [<>] dump_stack+0x19/0x1b [<>] warn_alloc_failed+0x110/0x180 [<>] __alloc_pages_slowpath+0x6b7/0x725 [<>] __alloc_pages_nodemask+0x405/0x420 [<>] dma_generic_alloc_coherent+0x8f/0x140 [<>] x86_swiotlb_alloc_coherent+0x21/0x50 [<>] mlx5_dma_zalloc_coherent_node+0xad/0x110 [mlx5_core] [<>] ? mlx5_db_alloc_node+0x69/0x1b0 [mlx5_core] [<>] mlx5_buf_alloc_node+0x3e/0xa0 [mlx5_core] [<>] mlx5_buf_alloc+0x14/0x20 [mlx5_core] [<>] create_cq_kernel+0x90/0x1f0 [mlx5_ib] [<>] mlx5_ib_create_cq+0x3b0/0x4e0 [mlx5_ib] Signed-off-by: Yonatan Cohen Reviewed-by: Tariq Toukan Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 51 ++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2860a253275b..bfea26af6de5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -345,13 +345,6 @@ struct mlx5_buf_list { dma_addr_t map; }; -struct mlx5_buf { - struct mlx5_buf_list direct; - int npages; - int size; - u8 page_shift; -}; - struct mlx5_frag_buf { struct mlx5_buf_list *frags; int npages; @@ -359,6 +352,15 @@ struct mlx5_frag_buf { u8 page_shift; }; +struct mlx5_frag_buf_ctrl { + struct mlx5_frag_buf frag_buf; + u32 sz_m1; + u32 frag_sz_m1; + u8 log_sz; + u8 log_stride; + u8 log_frag_strides; +}; + struct mlx5_eq_tasklet { struct list_head list; struct list_head process_list; @@ -386,7 +388,7 @@ struct mlx5_eq { struct mlx5_cq_table cq_table; __be32 __iomem *doorbell; u32 cons_index; - struct mlx5_buf buf; + struct mlx5_frag_buf buf; int size; unsigned int irqn; u8 eqn; @@ -932,9 +934,9 @@ struct mlx5_hca_vport_context { bool grh_required; }; -static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) +static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset) { - return buf->direct.buf + offset; + return buf->frags->buf + offset; } #define STRUCT_FIELD(header, field) \ @@ -973,6 +975,25 @@ static inline u32 mlx5_base_mkey(const u32 key) return key & 0xffffff00u; } +static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc, + void *cqc) +{ + fbc->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz); + fbc->log_sz = MLX5_GET(cqc, cqc, log_cq_size); + fbc->sz_m1 = (1 << fbc->log_sz) - 1; + fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride; + fbc->frag_sz_m1 = (1 << fbc->log_frag_strides) - 1; +} + +static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, + u32 ix) +{ + unsigned int frag = (ix >> fbc->log_frag_strides); + + return fbc->frag_buf.frags[frag].buf + + ((fbc->frag_sz_m1 & ix) << fbc->log_stride); +} + int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); @@ -998,9 +1019,10 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, - struct mlx5_buf *buf, int node); -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); -void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); + struct mlx5_frag_buf *buf, int node); +int mlx5_buf_alloc(struct mlx5_core_dev *dev, + int size, struct mlx5_frag_buf *buf); +void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); @@ -1045,7 +1067,8 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); + +void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); -- cgit v1.2.3 From d8d211a2a0c37755a8660dc69f97b7c70bf210b1 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Wed, 14 Feb 2018 16:39:56 +0300 Subject: net: Make extern and export get_net_ns() This function will be used to obtain net of tun device. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 9286a5a8c60c..1ce1f768a58c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -353,4 +353,6 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen unsigned int flags, struct timespec *timeout); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags); + +extern struct ns_common *get_net_ns(struct ns_common *ns); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 562c45d635ecd5c0648ceb4d4aff9bdc1ad91252 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Feb 2018 16:49:45 -0800 Subject: headers: Drop two #included headers from It seems that does not need nor . 8 kernels builds are successful without these 2 headers (allmodconfig, allyesconfig, allnoconfig, and tinyconfig on both i386 and x86_64). is #included 3875 times in 4.16-rc1, so this reduces #include processing of these 2 files by a total of 7750 times. Since I only tested x86 builds, this needs to be tested on other $ARCHes as well. Signed-off-by: Randy Dunlap Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/b24b9ec8-4970-65f5-759a-911d4ba2fcf0@infradead.org Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 69c238210325..5426627f9c55 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -4,9 +4,7 @@ #define _LINUX_INTERRUPT_H #include -#include #include -#include #include #include #include -- cgit v1.2.3 From 43a0a45abc4ab386f3ba978c877a2b68a0cad448 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 5 Feb 2018 23:01:59 +0100 Subject: mtd: nand: Get rid of comments giving the file path inside the file itself Some files add a comment giving the path of the file inside the Linux tree, which is pretty useless since the reader had to find the file to open it. Getting rid of these comments will also allow us to easily move these files around when needed. Signed-off-by: Boris Brezillon --- include/linux/mtd/bbm.h | 2 -- include/linux/mtd/nand_ecc.h | 2 -- include/linux/mtd/ndfc.h | 2 -- 3 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h index 3bf8f954b642..3102bd754d18 100644 --- a/include/linux/mtd/bbm.h +++ b/include/linux/mtd/bbm.h @@ -1,6 +1,4 @@ /* - * linux/include/linux/mtd/bbm.h - * * NAND family Bad Block Management (BBM) header file * - Bad Block Table (BBT) implementation * diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h index 4d8406c81652..8a2decf7462c 100644 --- a/include/linux/mtd/nand_ecc.h +++ b/include/linux/mtd/nand_ecc.h @@ -1,6 +1,4 @@ /* - * drivers/mtd/nand_ecc.h - * * Copyright (C) 2000-2010 Steven J. Hill * David Woodhouse * Thomas Gleixner diff --git a/include/linux/mtd/ndfc.h b/include/linux/mtd/ndfc.h index d0558a982628..357e88b3263a 100644 --- a/include/linux/mtd/ndfc.h +++ b/include/linux/mtd/ndfc.h @@ -1,6 +1,4 @@ /* - * linux/include/linux/mtd/ndfc.h - * * Copyright (c) 2006 Thomas Gleixner * * This program is free software; you can redistribute it and/or modify -- cgit v1.2.3 From 9c3736a3de21d916a6af0594418b85a112f4bef6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 5 Feb 2018 23:02:05 +0100 Subject: mtd: nand: Add core infrastructure to deal with NAND devices Add an intermediate layer to abstract NAND device interface so that some logic can be shared between SPI NANDs, parallel/raw NANDs, OneNANDs, ... Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 731 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 731 insertions(+) create mode 100644 include/linux/mtd/nand.h (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h new file mode 100644 index 000000000000..792ea5c26329 --- /dev/null +++ b/include/linux/mtd/nand.h @@ -0,0 +1,731 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2017 - Free Electrons + * + * Authors: + * Boris Brezillon + * Peter Pan + */ + +#ifndef __LINUX_MTD_NAND_H +#define __LINUX_MTD_NAND_H + +#include + +/** + * struct nand_memory_organization - Memory organization structure + * @bits_per_cell: number of bits per NAND cell + * @pagesize: page size + * @oobsize: OOB area size + * @pages_per_eraseblock: number of pages per eraseblock + * @eraseblocks_per_lun: number of eraseblocks per LUN (Logical Unit Number) + * @planes_per_lun: number of planes per LUN + * @luns_per_target: number of LUN per target (target is a synonym for die) + * @ntargets: total number of targets exposed by the NAND device + */ +struct nand_memory_organization { + unsigned int bits_per_cell; + unsigned int pagesize; + unsigned int oobsize; + unsigned int pages_per_eraseblock; + unsigned int eraseblocks_per_lun; + unsigned int planes_per_lun; + unsigned int luns_per_target; + unsigned int ntargets; +}; + +#define NAND_MEMORG(bpc, ps, os, ppe, epl, ppl, lpt, nt) \ + { \ + .bits_per_cell = (bpc), \ + .pagesize = (ps), \ + .oobsize = (os), \ + .pages_per_eraseblock = (ppe), \ + .eraseblocks_per_lun = (epl), \ + .planes_per_lun = (ppl), \ + .luns_per_target = (lpt), \ + .ntargets = (nt), \ + } + +/** + * struct nand_row_converter - Information needed to convert an absolute offset + * into a row address + * @lun_addr_shift: position of the LUN identifier in the row address + * @eraseblock_addr_shift: position of the eraseblock identifier in the row + * address + */ +struct nand_row_converter { + unsigned int lun_addr_shift; + unsigned int eraseblock_addr_shift; +}; + +/** + * struct nand_pos - NAND position object + * @target: the NAND target/die + * @lun: the LUN identifier + * @plane: the plane within the LUN + * @eraseblock: the eraseblock within the LUN + * @page: the page within the LUN + * + * These information are usually used by specific sub-layers to select the + * appropriate target/die and generate a row address to pass to the device. + */ +struct nand_pos { + unsigned int target; + unsigned int lun; + unsigned int plane; + unsigned int eraseblock; + unsigned int page; +}; + +/** + * struct nand_page_io_req - NAND I/O request object + * @pos: the position this I/O request is targeting + * @dataoffs: the offset within the page + * @datalen: number of data bytes to read from/write to this page + * @databuf: buffer to store data in or get data from + * @ooboffs: the OOB offset within the page + * @ooblen: the number of OOB bytes to read from/write to this page + * @oobbuf: buffer to store OOB data in or get OOB data from + * + * This object is used to pass per-page I/O requests to NAND sub-layers. This + * way all useful information are already formatted in a useful way and + * specific NAND layers can focus on translating these information into + * specific commands/operations. + */ +struct nand_page_io_req { + struct nand_pos pos; + unsigned int dataoffs; + unsigned int datalen; + union { + const void *out; + void *in; + } databuf; + unsigned int ooboffs; + unsigned int ooblen; + union { + const void *out; + void *in; + } oobbuf; +}; + +/** + * struct nand_ecc_req - NAND ECC requirements + * @strength: ECC strength + * @step_size: ECC step/block size + */ +struct nand_ecc_req { + unsigned int strength; + unsigned int step_size; +}; + +#define NAND_ECCREQ(str, stp) { .strength = (str), .step_size = (stp) } + +/** + * struct nand_bbt - bad block table object + * @cache: in memory BBT cache + */ +struct nand_bbt { + unsigned long *cache; +}; + +struct nand_device; + +/** + * struct nand_ops - NAND operations + * @erase: erase a specific block. No need to check if the block is bad before + * erasing, this has been taken care of by the generic NAND layer + * @markbad: mark a specific block bad. No need to check if the block is + * already marked bad, this has been taken care of by the generic + * NAND layer. This method should just write the BBM (Bad Block + * Marker) so that future call to struct_nand_ops->isbad() return + * true + * @isbad: check whether a block is bad or not. This method should just read + * the BBM and return whether the block is bad or not based on what it + * reads + * + * These are all low level operations that should be implemented by specialized + * NAND layers (SPI NAND, raw NAND, ...). + */ +struct nand_ops { + int (*erase)(struct nand_device *nand, const struct nand_pos *pos); + int (*markbad)(struct nand_device *nand, const struct nand_pos *pos); + bool (*isbad)(struct nand_device *nand, const struct nand_pos *pos); +}; + +/** + * struct nand_device - NAND device + * @mtd: MTD instance attached to the NAND device + * @memorg: memory layout + * @eccreq: ECC requirements + * @rowconv: position to row address converter + * @bbt: bad block table info + * @ops: NAND operations attached to the NAND device + * + * Generic NAND object. Specialized NAND layers (raw NAND, SPI NAND, OneNAND) + * should declare their own NAND object embedding a nand_device struct (that's + * how inheritance is done). + * struct_nand_device->memorg and struct_nand_device->eccreq should be filled + * at device detection time to reflect the NAND device + * capabilities/requirements. Once this is done nanddev_init() can be called. + * It will take care of converting NAND information into MTD ones, which means + * the specialized NAND layers should never manually tweak + * struct_nand_device->mtd except for the ->_read/write() hooks. + */ +struct nand_device { + struct mtd_info mtd; + struct nand_memory_organization memorg; + struct nand_ecc_req eccreq; + struct nand_row_converter rowconv; + struct nand_bbt bbt; + const struct nand_ops *ops; +}; + +/** + * struct nand_io_iter - NAND I/O iterator + * @req: current I/O request + * @oobbytes_per_page: maximum number of OOB bytes per page + * @dataleft: remaining number of data bytes to read/write + * @oobleft: remaining number of OOB bytes to read/write + * + * Can be used by specialized NAND layers to iterate over all pages covered + * by an MTD I/O request, which should greatly simplifies the boiler-plate + * code needed to read/write data from/to a NAND device. + */ +struct nand_io_iter { + struct nand_page_io_req req; + unsigned int oobbytes_per_page; + unsigned int dataleft; + unsigned int oobleft; +}; + +/** + * mtd_to_nanddev() - Get the NAND device attached to the MTD instance + * @mtd: MTD instance + * + * Return: the NAND device embedding @mtd. + */ +static inline struct nand_device *mtd_to_nanddev(struct mtd_info *mtd) +{ + return container_of(mtd, struct nand_device, mtd); +} + +/** + * nanddev_to_mtd() - Get the MTD device attached to a NAND device + * @nand: NAND device + * + * Return: the MTD device embedded in @nand. + */ +static inline struct mtd_info *nanddev_to_mtd(struct nand_device *nand) +{ + return &nand->mtd; +} + +/* + * nanddev_bits_per_cell() - Get the number of bits per cell + * @nand: NAND device + * + * Return: the number of bits per cell. + */ +static inline unsigned int nanddev_bits_per_cell(const struct nand_device *nand) +{ + return nand->memorg.bits_per_cell; +} + +/** + * nanddev_page_size() - Get NAND page size + * @nand: NAND device + * + * Return: the page size. + */ +static inline size_t nanddev_page_size(const struct nand_device *nand) +{ + return nand->memorg.pagesize; +} + +/** + * nanddev_per_page_oobsize() - Get NAND OOB size + * @nand: NAND device + * + * Return: the OOB size. + */ +static inline unsigned int +nanddev_per_page_oobsize(const struct nand_device *nand) +{ + return nand->memorg.oobsize; +} + +/** + * nanddev_pages_per_eraseblock() - Get the number of pages per eraseblock + * @nand: NAND device + * + * Return: the number of pages per eraseblock. + */ +static inline unsigned int +nanddev_pages_per_eraseblock(const struct nand_device *nand) +{ + return nand->memorg.pages_per_eraseblock; +} + +/** + * nanddev_per_page_oobsize() - Get NAND erase block size + * @nand: NAND device + * + * Return: the eraseblock size. + */ +static inline size_t nanddev_eraseblock_size(const struct nand_device *nand) +{ + return nand->memorg.pagesize * nand->memorg.pages_per_eraseblock; +} + +/** + * nanddev_eraseblocks_per_lun() - Get the number of eraseblocks per LUN + * @nand: NAND device + * + * Return: the number of eraseblocks per LUN. + */ +static inline unsigned int +nanddev_eraseblocks_per_lun(const struct nand_device *nand) +{ + return nand->memorg.eraseblocks_per_lun; +} + +/** + * nanddev_target_size() - Get the total size provided by a single target/die + * @nand: NAND device + * + * Return: the total size exposed by a single target/die in bytes. + */ +static inline u64 nanddev_target_size(const struct nand_device *nand) +{ + return (u64)nand->memorg.luns_per_target * + nand->memorg.eraseblocks_per_lun * + nand->memorg.pages_per_eraseblock * + nand->memorg.pagesize; +} + +/** + * nanddev_ntarget() - Get the total of targets + * @nand: NAND device + * + * Return: the number of targets/dies exposed by @nand. + */ +static inline unsigned int nanddev_ntargets(const struct nand_device *nand) +{ + return nand->memorg.ntargets; +} + +/** + * nanddev_neraseblocks() - Get the total number of erasablocks + * @nand: NAND device + * + * Return: the total number of eraseblocks exposed by @nand. + */ +static inline unsigned int nanddev_neraseblocks(const struct nand_device *nand) +{ + return (u64)nand->memorg.luns_per_target * + nand->memorg.eraseblocks_per_lun * + nand->memorg.pages_per_eraseblock; +} + +/** + * nanddev_size() - Get NAND size + * @nand: NAND device + * + * Return: the total size (in bytes) exposed by @nand. + */ +static inline u64 nanddev_size(const struct nand_device *nand) +{ + return nanddev_target_size(nand) * nanddev_ntargets(nand); +} + +/** + * nanddev_get_memorg() - Extract memory organization info from a NAND device + * @nand: NAND device + * + * This can be used by the upper layer to fill the memorg info before calling + * nanddev_init(). + * + * Return: the memorg object embedded in the NAND device. + */ +static inline struct nand_memory_organization * +nanddev_get_memorg(struct nand_device *nand) +{ + return &nand->memorg; +} + +int nanddev_init(struct nand_device *nand, const struct nand_ops *ops, + struct module *owner); +void nanddev_cleanup(struct nand_device *nand); + +/** + * nanddev_register() - Register a NAND device + * @nand: NAND device + * + * Register a NAND device. + * This function is just a wrapper around mtd_device_register() + * registering the MTD device embedded in @nand. + * + * Return: 0 in case of success, a negative error code otherwise. + */ +static inline int nanddev_register(struct nand_device *nand) +{ + return mtd_device_register(&nand->mtd, NULL, 0); +} + +/** + * nanddev_unregister() - Unregister a NAND device + * @nand: NAND device + * + * Unregister a NAND device. + * This function is just a wrapper around mtd_device_unregister() + * unregistering the MTD device embedded in @nand. + * + * Return: 0 in case of success, a negative error code otherwise. + */ +static inline int nanddev_unregister(struct nand_device *nand) +{ + return mtd_device_unregister(&nand->mtd); +} + +/** + * nanddev_set_of_node() - Attach a DT node to a NAND device + * @nand: NAND device + * @np: DT node + * + * Attach a DT node to a NAND device. + */ +static inline void nanddev_set_of_node(struct nand_device *nand, + struct device_node *np) +{ + mtd_set_of_node(&nand->mtd, np); +} + +/** + * nanddev_get_of_node() - Retrieve the DT node attached to a NAND device + * @nand: NAND device + * + * Return: the DT node attached to @nand. + */ +static inline struct device_node *nanddev_get_of_node(struct nand_device *nand) +{ + return mtd_get_of_node(&nand->mtd); +} + +/** + * nanddev_offs_to_pos() - Convert an absolute NAND offset into a NAND position + * @nand: NAND device + * @offs: absolute NAND offset (usually passed by the MTD layer) + * @pos: a NAND position object to fill in + * + * Converts @offs into a nand_pos representation. + * + * Return: the offset within the NAND page pointed by @pos. + */ +static inline unsigned int nanddev_offs_to_pos(struct nand_device *nand, + loff_t offs, + struct nand_pos *pos) +{ + unsigned int pageoffs; + u64 tmp = offs; + + pageoffs = do_div(tmp, nand->memorg.pagesize); + pos->page = do_div(tmp, nand->memorg.pages_per_eraseblock); + pos->eraseblock = do_div(tmp, nand->memorg.eraseblocks_per_lun); + pos->plane = pos->eraseblock % nand->memorg.planes_per_lun; + pos->lun = do_div(tmp, nand->memorg.luns_per_target); + pos->target = tmp; + + return pageoffs; +} + +/** + * nanddev_pos_cmp() - Compare two NAND positions + * @a: First NAND position + * @b: Second NAND position + * + * Compares two NAND positions. + * + * Return: -1 if @a < @b, 0 if @a == @b and 1 if @a > @b. + */ +static inline int nanddev_pos_cmp(const struct nand_pos *a, + const struct nand_pos *b) +{ + if (a->target != b->target) + return a->target < b->target ? -1 : 1; + + if (a->lun != b->lun) + return a->lun < b->lun ? -1 : 1; + + if (a->eraseblock != b->eraseblock) + return a->eraseblock < b->eraseblock ? -1 : 1; + + if (a->page != b->page) + return a->page < b->page ? -1 : 1; + + return 0; +} + +/** + * nanddev_pos_to_offs() - Convert a NAND position into an absolute offset + * @nand: NAND device + * @pos: the NAND position to convert + * + * Converts @pos NAND position into an absolute offset. + * + * Return: the absolute offset. Note that @pos points to the beginning of a + * page, if one wants to point to a specific offset within this page + * the returned offset has to be adjusted manually. + */ +static inline loff_t nanddev_pos_to_offs(struct nand_device *nand, + const struct nand_pos *pos) +{ + unsigned int npages; + + npages = pos->page + + ((pos->eraseblock + + (pos->lun + + (pos->target * nand->memorg.luns_per_target)) * + nand->memorg.eraseblocks_per_lun) * + nand->memorg.pages_per_eraseblock); + + return (loff_t)npages * nand->memorg.pagesize; +} + +/** + * nanddev_pos_to_row() - Extract a row address from a NAND position + * @nand: NAND device + * @pos: the position to convert + * + * Converts a NAND position into a row address that can then be passed to the + * device. + * + * Return: the row address extracted from @pos. + */ +static inline unsigned int nanddev_pos_to_row(struct nand_device *nand, + const struct nand_pos *pos) +{ + return (pos->lun << nand->rowconv.lun_addr_shift) | + (pos->eraseblock << nand->rowconv.eraseblock_addr_shift) | + pos->page; +} + +/** + * nanddev_pos_next_target() - Move a position to the next target/die + * @nand: NAND device + * @pos: the position to update + * + * Updates @pos to point to the start of the next target/die. Useful when you + * want to iterate over all targets/dies of a NAND device. + */ +static inline void nanddev_pos_next_target(struct nand_device *nand, + struct nand_pos *pos) +{ + pos->page = 0; + pos->plane = 0; + pos->eraseblock = 0; + pos->lun = 0; + pos->target++; +} + +/** + * nanddev_pos_next_lun() - Move a position to the next LUN + * @nand: NAND device + * @pos: the position to update + * + * Updates @pos to point to the start of the next LUN. Useful when you want to + * iterate over all LUNs of a NAND device. + */ +static inline void nanddev_pos_next_lun(struct nand_device *nand, + struct nand_pos *pos) +{ + if (pos->lun >= nand->memorg.luns_per_target - 1) + return nanddev_pos_next_target(nand, pos); + + pos->lun++; + pos->page = 0; + pos->plane = 0; + pos->eraseblock = 0; +} + +/** + * nanddev_pos_next_eraseblock() - Move a position to the next eraseblock + * @nand: NAND device + * @pos: the position to update + * + * Updates @pos to point to the start of the next eraseblock. Useful when you + * want to iterate over all eraseblocks of a NAND device. + */ +static inline void nanddev_pos_next_eraseblock(struct nand_device *nand, + struct nand_pos *pos) +{ + if (pos->eraseblock >= nand->memorg.eraseblocks_per_lun - 1) + return nanddev_pos_next_lun(nand, pos); + + pos->eraseblock++; + pos->page = 0; + pos->plane = pos->eraseblock % nand->memorg.planes_per_lun; +} + +/** + * nanddev_pos_next_eraseblock() - Move a position to the next page + * @nand: NAND device + * @pos: the position to update + * + * Updates @pos to point to the start of the next page. Useful when you want to + * iterate over all pages of a NAND device. + */ +static inline void nanddev_pos_next_page(struct nand_device *nand, + struct nand_pos *pos) +{ + if (pos->page >= nand->memorg.pages_per_eraseblock - 1) + return nanddev_pos_next_eraseblock(nand, pos); + + pos->page++; +} + +/** + * nand_io_iter_init - Initialize a NAND I/O iterator + * @nand: NAND device + * @offs: absolute offset + * @req: MTD request + * @iter: NAND I/O iterator + * + * Initializes a NAND iterator based on the information passed by the MTD + * layer. + */ +static inline void nanddev_io_iter_init(struct nand_device *nand, + loff_t offs, struct mtd_oob_ops *req, + struct nand_io_iter *iter) +{ + struct mtd_info *mtd = nanddev_to_mtd(nand); + + iter->req.dataoffs = nanddev_offs_to_pos(nand, offs, &iter->req.pos); + iter->req.ooboffs = req->ooboffs; + iter->oobbytes_per_page = mtd_oobavail(mtd, req); + iter->dataleft = req->len; + iter->oobleft = req->ooblen; + iter->req.databuf.in = req->datbuf; + iter->req.datalen = min_t(unsigned int, + nand->memorg.pagesize - iter->req.dataoffs, + iter->dataleft); + iter->req.oobbuf.in = req->oobbuf; + iter->req.ooblen = min_t(unsigned int, + iter->oobbytes_per_page - iter->req.ooboffs, + iter->oobleft); +} + +/** + * nand_io_iter_next_page - Move to the next page + * @nand: NAND device + * @iter: NAND I/O iterator + * + * Updates the @iter to point to the next page. + */ +static inline void nanddev_io_iter_next_page(struct nand_device *nand, + struct nand_io_iter *iter) +{ + nanddev_pos_next_page(nand, &iter->req.pos); + iter->dataleft -= iter->req.datalen; + iter->req.databuf.in += iter->req.datalen; + iter->oobleft -= iter->req.ooblen; + iter->req.oobbuf.in += iter->req.ooblen; + iter->req.dataoffs = 0; + iter->req.ooboffs = 0; + iter->req.datalen = min_t(unsigned int, nand->memorg.pagesize, + iter->dataleft); + iter->req.ooblen = min_t(unsigned int, iter->oobbytes_per_page, + iter->oobleft); +} + +/** + * nand_io_iter_end - Should end iteration or not + * @nand: NAND device + * @iter: NAND I/O iterator + * + * Check whether @iter has reached the end of the NAND portion it was asked to + * iterate on or not. + * + * Return: true if @iter has reached the end of the iteration request, false + * otherwise. + */ +static inline bool nanddev_io_iter_end(struct nand_device *nand, + const struct nand_io_iter *iter) +{ + if (iter->dataleft || iter->oobleft) + return false; + + return true; +} + +/** + * nand_io_for_each_page - Iterate over all NAND pages contained in an MTD I/O + * request + * @nand: NAND device + * @start: start address to read/write from + * @req: MTD I/O request + * @iter: NAND I/O iterator + * + * Should be used for iterate over pages that are contained in an MTD request. + */ +#define nanddev_io_for_each_page(nand, start, req, iter) \ + for (nanddev_io_iter_init(nand, start, req, iter); \ + !nanddev_io_iter_end(nand, iter); \ + nanddev_io_iter_next_page(nand, iter)) + +bool nanddev_isbad(struct nand_device *nand, const struct nand_pos *pos); +bool nanddev_isreserved(struct nand_device *nand, const struct nand_pos *pos); +int nanddev_erase(struct nand_device *nand, const struct nand_pos *pos); +int nanddev_markbad(struct nand_device *nand, const struct nand_pos *pos); + +/* BBT related functions */ +enum nand_bbt_block_status { + NAND_BBT_BLOCK_STATUS_UNKNOWN, + NAND_BBT_BLOCK_GOOD, + NAND_BBT_BLOCK_WORN, + NAND_BBT_BLOCK_RESERVED, + NAND_BBT_BLOCK_FACTORY_BAD, + NAND_BBT_BLOCK_NUM_STATUS, +}; + +int nanddev_bbt_init(struct nand_device *nand); +void nanddev_bbt_cleanup(struct nand_device *nand); +int nanddev_bbt_update(struct nand_device *nand); +int nanddev_bbt_get_block_status(const struct nand_device *nand, + unsigned int entry); +int nanddev_bbt_set_block_status(struct nand_device *nand, unsigned int entry, + enum nand_bbt_block_status status); +int nanddev_bbt_markbad(struct nand_device *nand, unsigned int block); + +/** + * nanddev_bbt_pos_to_entry() - Convert a NAND position into a BBT entry + * @nand: NAND device + * @pos: the NAND position we want to get BBT entry for + * + * Return the BBT entry used to store information about the eraseblock pointed + * by @pos. + * + * Return: the BBT entry storing information about eraseblock pointed by @pos. + */ +static inline unsigned int nanddev_bbt_pos_to_entry(struct nand_device *nand, + const struct nand_pos *pos) +{ + return pos->eraseblock + + ((pos->lun + (pos->target * nand->memorg.luns_per_target)) * + nand->memorg.eraseblocks_per_lun); +} + +/** + * nanddev_bbt_is_initialized() - Check if the BBT has been initialized + * @nand: NAND device + * + * Return: true if the BBT has been initialized, false otherwise. + */ +static inline bool nanddev_bbt_is_initialized(struct nand_device *nand) +{ + return !!nand->bbt.cache; +} + +/* MTD -> NAND helper functions. */ +int nanddev_mtd_erase(struct mtd_info *mtd, struct erase_info *einfo); + +#endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From 6de0b13cc0b4ba10e98a9263d7a83b940720b77a Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Mon, 8 Jan 2018 10:41:41 +0800 Subject: HID: core: Fix size as type u32 When size is negative, calling memset will make segment fault. Declare the size as type u32 to keep memset safe. size in struct hid_report is unsigned, fix return type of hid_report_len to u32. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Signed-off-by: Jiri Kosina --- include/linux/hid.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 091a81cf330f..0efe80b59156 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -851,7 +851,7 @@ extern int hidinput_connect(struct hid_device *hid, unsigned int force); extern void hidinput_disconnect(struct hid_device *); int hid_set_field(struct hid_field *, unsigned, __s32); -int hid_input_report(struct hid_device *, int type, u8 *, int, int); +int hid_input_report(struct hid_device *, int type, u8 *, u32, int); int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field); struct hid_field *hidinput_get_led_field(struct hid_device *hid); unsigned int hidinput_count_leds(struct hid_device *hid); @@ -1102,13 +1102,13 @@ static inline void hid_hw_wait(struct hid_device *hdev) * * @report: the report we want to know the length */ -static inline int hid_report_len(struct hid_report *report) +static inline u32 hid_report_len(struct hid_report *report) { /* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */ return ((report->size - 1) >> 3) + 1 + (report->id > 0); } -int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size, +int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, int interrupt); /* HID quirks API */ -- cgit v1.2.3 From 0957a2c1d97586893d5ba7ce864b1d7e0b82b162 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 13 Feb 2018 08:22:36 +1100 Subject: sched/wait: add wait_event_idle() functions. The new TASK_IDLE state (TASK_UNINTERRUPTIBLE | __TASK_NOLOAD) is not much used. One way to make it easier to use is to add wait_event*() family functions that make use of it. This patch adds: wait_event_idle() wait_event_idle_timeout() wait_event_idle_exclusive() wait_event_idle_exclusive_timeout() This set was chosen because lustre needs them before it can discard its own l_wait_event() macro. Acked-by: Peter Zijlstra (Intel) Reviewed-by: James Simmons Signed-off-by: NeilBrown Reviewed-by: Patrick Farrell Signed-off-by: Greg Kroah-Hartman --- include/linux/wait.h | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 55a611486bac..d9f131ecf708 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -599,6 +599,120 @@ do { \ __ret; \ }) +/** + * wait_event_idle - wait for a condition without contributing to system load + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_IDLE) until the + * @condition evaluates to true. + * The @condition is checked each time the waitqueue @wq_head is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + */ +#define wait_event_idle(wq_head, condition) \ +do { \ + might_sleep(); \ + if (!(condition)) \ + ___wait_event(wq_head, condition, TASK_IDLE, 0, 0, schedule()); \ +} while (0) + +/** + * wait_event_idle_exclusive - wait for a condition with contributing to system load + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_IDLE) until the + * @condition evaluates to true. + * The @condition is checked each time the waitqueue @wq_head is woken up. + * + * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag + * set thus if other processes wait on the same list, when this + * process is woken further processes are not considered. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + */ +#define wait_event_idle_exclusive(wq_head, condition) \ +do { \ + might_sleep(); \ + if (!(condition)) \ + ___wait_event(wq_head, condition, TASK_IDLE, 1, 0, schedule()); \ +} while (0) + +#define __wait_event_idle_timeout(wq_head, condition, timeout) \ + ___wait_event(wq_head, ___wait_cond_timeout(condition), \ + TASK_IDLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) + +/** + * wait_event_idle_timeout - sleep without load until a condition becomes true or a timeout elapses + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_IDLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq_head is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * or the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed. + */ +#define wait_event_idle_timeout(wq_head, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_idle_timeout(wq_head, condition, timeout); \ + __ret; \ +}) + +#define __wait_event_idle_exclusive_timeout(wq_head, condition, timeout) \ + ___wait_event(wq_head, ___wait_cond_timeout(condition), \ + TASK_IDLE, 1, timeout, \ + __ret = schedule_timeout(__ret)) + +/** + * wait_event_idle_exclusive_timeout - sleep without load until a condition becomes true or a timeout elapses + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_IDLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq_head is woken up. + * + * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag + * set thus if other processes wait on the same list, when this + * process is woken further processes are not considered. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * or the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed. + */ +#define wait_event_idle_exclusive_timeout(wq_head, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_idle_exclusive_timeout(wq_head, condition, timeout);\ + __ret; \ +}) + extern int do_wait_intr(wait_queue_head_t *, wait_queue_entry_t *); extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); -- cgit v1.2.3 From 5cf0c37a71da0f3a4802806c597b21d99c33ca60 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 19 Dec 2017 00:38:02 -0500 Subject: PCI: Remove pci_get_bus_and_slot() function pci_get_bus_and_slot() is restrictive such that it assumes domain=0 as where a PCI device is present. This restricts the device drivers to be reused for other domain numbers. Now that all users of pci_get_bus_and_slot() switched to pci_get_domain_bus_and_slot(), it is now safe to remove this function. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 024a1beda008..25b7a3535d26 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -949,11 +949,6 @@ struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, unsigned int devfn); -static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, - unsigned int devfn) -{ - return pci_get_domain_bus_and_slot(0, bus, devfn); -} struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); int pci_dev_present(const struct pci_device_id *ids); @@ -1661,9 +1656,6 @@ static inline struct pci_bus *pci_find_next_bus(const struct pci_bus *from) static inline struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn) { return NULL; } -static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, - unsigned int devfn) -{ return NULL; } static inline struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, unsigned int devfn) { return NULL; } -- cgit v1.2.3 From e45e290a882e2c0dc8ebb7dd21c66a8209d8e3a5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 12 Feb 2018 14:16:57 +0100 Subject: regulator: core: Support passing an initialized GPIO enable descriptor We are currently passing a GPIO number from the global GPIO numberspace into the regulator core for handling enable GPIOs. This is not good since it ties into the global GPIO numberspace and uses gpio_to_desc() to overcome this. Start supporting passing an already initialized GPIO descriptor to the core instead: leaf drivers pick their descriptors, associated directly with the device node (or from ACPI or from a board descriptor table) and use that directly without any roundtrip over the global GPIO numberspace. This looks messy since it adds a bunch of extra code in the core, but at the end of the patch series we will delete the handling of the GPIO number and only deal with descriptors so things end up neat. Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 4c00486b7a78..4fc96cb8e5d7 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -19,6 +19,7 @@ #include #include +struct gpio_desc; struct regmap; struct regulator_dev; struct regulator_config; @@ -387,6 +388,7 @@ struct regulator_desc { * initialized, meaning that >= 0 is a valid gpio * identifier and < 0 is a non existent gpio. * @ena_gpio: GPIO controlling regulator enable. + * @ena_gpiod: GPIO descriptor controlling regulator enable. * @ena_gpio_invert: Sense for GPIO enable control. * @ena_gpio_flags: Flags to use when calling gpio_request_one() */ @@ -399,6 +401,7 @@ struct regulator_config { bool ena_gpio_initialized; int ena_gpio; + struct gpio_desc *ena_gpiod; unsigned int ena_gpio_invert:1; unsigned int ena_gpio_flags; }; -- cgit v1.2.3 From 8d05560d1d011e5a842556efdbd70cc8a21499bb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 12 Feb 2018 14:17:00 +0100 Subject: regulator: da9055: Pass descriptor instead of GPIO number When setting up a fixed regulator on the DA9055, pass a descriptor instead of a global GPIO number. This facility is not used in the kernel so we can easily just say that this should be a descriptor if/when put to use. Signed-off-by: Linus Walleij Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/da9055/pdata.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h index 04e092be4b07..1a94fa2ac309 100644 --- a/include/linux/mfd/da9055/pdata.h +++ b/include/linux/mfd/da9055/pdata.h @@ -12,6 +12,7 @@ #define DA9055_MAX_REGULATORS 8 struct da9055; +struct gpio_desc; enum gpio_select { NO_GPIO = 0, @@ -47,7 +48,7 @@ struct da9055_pdata { * controls the regulator set A/B, 0 if not available. */ enum gpio_select *reg_rsel; - /* GPIOs to enable regulator, 0 if not available */ - int *ena_gpio; + /* GPIO descriptors to enable regulator, NULL if not available */ + struct gpio_desc **ena_gpiods; }; #endif /* __DA9055_PDATA_H */ -- cgit v1.2.3 From 11da04af0d3b4c24ab057dd17f54dbc854d735de Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 12 Feb 2018 14:17:02 +0100 Subject: regulator: da9211: Pass descriptors instead of GPIO numbers This augments the DA9211 regulator driver to fetch its GPIO descriptors directly from the device tree using the newly exported devm_get_gpiod_from_child(). Signed-off-by: Linus Walleij Signed-off-by: Mark Brown --- include/linux/regulator/da9211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index f2fd2d3bf58f..d1f2073e4d5f 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -21,6 +21,8 @@ #define DA9211_MAX_REGULATORS 2 +struct gpio_desc; + enum da9211_chip_id { DA9211, DA9212, @@ -39,7 +41,7 @@ struct da9211_pdata { * 2 : 2 phase 2 buck */ int num_buck; - int gpio_ren[DA9211_MAX_REGULATORS]; + struct gpio_desc *gpiod_ren[DA9211_MAX_REGULATORS]; struct device_node *reg_node[DA9211_MAX_REGULATORS]; struct regulator_init_data *init_data[DA9211_MAX_REGULATORS]; }; -- cgit v1.2.3 From 6f89dbce8e1134458de8a8e376acaaca4eee602e Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 15 Feb 2018 10:49:32 -0800 Subject: skbuff: export mm_[un]account_pinned_pages for other modules RDS would like to use the helper functions for managing pinned pages added by Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages") Signed-off-by: Sowmini Varadhan Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5ebc0f869720..b1cc38af53e1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -466,6 +466,9 @@ struct ubuf_info { #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) +int mm_account_pinned_pages(struct mmpin *mmp, size_t size); +void mm_unaccount_pinned_pages(struct mmpin *mmp); + struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg); -- cgit v1.2.3 From bdec5a6b57896da81bc47262868468717a06bb69 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sat, 17 Feb 2018 21:22:24 -0600 Subject: ARM: da8xx: use platform data for CFGCHIP syscon regmap This converts from using a platform device for the CFGCHIP syscon regmap to using platform data to pass the regmap to consumers. A lazy getter function is used so that the regmap will only be created if it is actually used. This function will also be used in the clock init when we convert to the common clock framework. The USB PHY driver is currently the only consumer. This driver is updated to use platform data to get the CFGCHIP regmap instead of syscon_regmap_lookup_by_pdevname(). Signed-off-by: David Lechner Acked-by: Kishon Vijay Abraham I Signed-off-by: Sekhar Nori --- include/linux/platform_data/phy-da8xx-usb.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/linux/platform_data/phy-da8xx-usb.h (limited to 'include/linux') diff --git a/include/linux/platform_data/phy-da8xx-usb.h b/include/linux/platform_data/phy-da8xx-usb.h new file mode 100644 index 000000000000..85c2b99381b2 --- /dev/null +++ b/include/linux/platform_data/phy-da8xx-usb.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * phy-da8xx-usb - TI DaVinci DA8xx USB PHY driver + * + * Copyright (C) 2018 David Lechner + */ + +#ifndef __LINUX_PLATFORM_DATA_PHY_DA8XX_USB_H__ +#define __LINUX_PLATFORM_DATA_PHY_DA8XX_USB_H__ + +#include + +/** + * da8xx_usb_phy_platform_data + * @cfgchip: CFGCHIP syscon regmap + */ +struct da8xx_usb_phy_platform_data { + struct regmap *cfgchip; +}; + +#endif /* __LINUX_PLATFORM_DATA_PHY_DA8XX_USB_H__ */ -- cgit v1.2.3 From 22e76844c566e474a9a3e0c2206e45766b5941b7 Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Tue, 6 Feb 2018 19:49:35 +0530 Subject: ieee80211: Increase PMK maximum length to 64 bytes Increase the PMK maximum length to 64 bytes to accommodate the key length used in DPP with the NIST P-521 and Brainpool 512 curves. Signed-off-by: Srinivas Dasari Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ee6657a0ed69..e4cba332b705 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2111,7 +2111,7 @@ enum ieee80211_key_len { #define FILS_ERP_MAX_REALM_LEN 253 #define FILS_ERP_MAX_RRK_LEN 64 -#define PMK_MAX_LEN 48 +#define PMK_MAX_LEN 64 /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */ enum ieee80211_pub_actioncode { -- cgit v1.2.3 From e3f9f41757f5ce1e95ef3bc3bfb72bbcdb23ece2 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Fri, 16 Feb 2018 12:06:13 +0100 Subject: ptr_ring: Remove now-redundant smp_read_barrier_depends() Because READ_ONCE() now implies smp_read_barrier_depends(), the smp_read_barrier_depends() in __ptr_ring_consume() is redundant; this commit removes it and updates the comments. Signed-off-by: Andrea Parri Cc: "David S. Miller" Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: John Fastabend Cc: Eric Dumazet Cc: Cc: Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index b884b7794187..ddfed1dce936 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -296,13 +296,14 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) { void *ptr; + /* The READ_ONCE in __ptr_ring_peek guarantees that anyone + * accessing data through the pointer is up to date. Pairs + * with smp_wmb in __ptr_ring_produce. + */ ptr = __ptr_ring_peek(r); if (ptr) __ptr_ring_discard_one(r); - /* Make sure anyone accessing data through the pointer is up to date. */ - /* Pairs with smp_wmb in __ptr_ring_produce. */ - smp_read_barrier_depends(); return ptr; } -- cgit v1.2.3 From 19efbd93e6fb05eab81856b4fc8d64211dd37088 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Feb 2018 12:58:38 +0300 Subject: net: Kill net_mutex We take net_mutex, when there are !async pernet_operations registered, and read locking of net_sem is not enough. But we may get rid of taking the mutex, and just change the logic to write lock net_sem in such cases. This obviously reduces the number of lock operations, we do. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index e9ee9ad0a681..3573b4bf2fdf 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -35,7 +35,6 @@ extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern wait_queue_head_t netdev_unregistering_wq; -extern struct mutex net_mutex; extern struct rw_semaphore net_sem; #ifdef CONFIG_PROVE_LOCKING -- cgit v1.2.3 From 30a050defbca46f60a04f22dc4306612eeaaf04b Mon Sep 17 00:00:00 2001 From: Lihao Liang Date: Thu, 21 Dec 2017 16:16:10 +0800 Subject: doc: Fix typo in rcu_head comments Signed-off-by: Lihao Liang Signed-off-by: Paul E. McKenney --- include/linux/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index c94d59ef96cc..ec13d02b3481 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -217,7 +217,7 @@ struct ustat { * * This guarantee is important for few reasons: * - future call_rcu_lazy() will make use of lower bits in the pointer; - * - the structure shares storage spacer in struct page with @compound_head, + * - the structure shares storage space in struct page with @compound_head, * which encode PageTail() in bit 0. The guarantee is needed to avoid * false-positive PageTail(). */ -- cgit v1.2.3 From b5482a06593c851028b5dc061f9c8882bcc20008 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 23 Jan 2018 14:48:33 -0800 Subject: rcu: Fix init_rcu_head() comment. The current (and implicit) comment header for init_rcu_head() and destroy_rcu_head() incorrectly says that they are not needed for statically allocated rcu_head structures. This commit therefore fixes this comment. Reported-by: Bart Van Assche Signed-off-by: Paul E. McKenney Reviewed-by: Bart Van Assche Reviewed-by: Leon Romanovsky --- include/linux/rcupdate.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 043d04784675..36360d07f25b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -214,10 +214,12 @@ do { \ #endif /* - * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic - * initialization and destruction of rcu_head on the stack. rcu_head structures - * allocated dynamically in the heap or defined statically don't need any - * initialization. + * The init_rcu_head_on_stack() and destroy_rcu_head_on_stack() calls + * are needed for dynamic initialization and destruction of rcu_head + * on the stack, and init_rcu_head()/destroy_rcu_head() are needed for + * dynamic initialization and destruction of statically allocated rcu_head + * structures. However, rcu_head structures allocated dynamically in the + * heap don't need any initialization. */ #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD void init_rcu_head(struct rcu_head *head); -- cgit v1.2.3 From a364298359e74a414857bbbf3b725564feb22d09 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Feb 2018 05:17:24 +0100 Subject: nohz: Convert tick_nohz_tick_stopped() to bool It makes this function more self-explanatory about what it does and how to use it. Reported-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1519186649-3242-3-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/tick.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 7cc35921218e..86576d9d2311 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -113,7 +113,7 @@ enum tick_dep_bits { #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; -extern int tick_nohz_tick_stopped(void); +extern bool tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); -- cgit v1.2.3 From 22ab8bc02a5f6e8ffc418759894f7a6b0b632331 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Feb 2018 05:17:25 +0100 Subject: nohz: Allow to check if remote CPU tick is stopped This check is racy but provides a good heuristic to determine whether a CPU may need a remote tick or not. Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1519186649-3242-4-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/tick.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 86576d9d2311..7f8c9a127f5a 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -114,6 +114,7 @@ enum tick_dep_bits { #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; extern bool tick_nohz_tick_stopped(void); +extern bool tick_nohz_tick_stopped_cpu(int cpu); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); @@ -125,6 +126,7 @@ extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } +static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } -- cgit v1.2.3 From 1bda3f8087fce9063da0b8aef87f17a3fe541aca Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Feb 2018 05:17:26 +0100 Subject: sched/isolation: Isolate workqueues when "nohz_full=" is set As we prepare for offloading the residual 1hz scheduler ticks to workqueue, let's affine those to housekeepers so that they don't interrupt the CPUs that don't want to be disturbed. Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1519186649-3242-5-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index d849431c8060..4a6582c27dea 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -12,6 +12,7 @@ enum hk_flags { HK_FLAG_SCHED = (1 << 3), HK_FLAG_TICK = (1 << 4), HK_FLAG_DOMAIN = (1 << 5), + HK_FLAG_WQ = (1 << 6), }; #ifdef CONFIG_CPU_ISOLATION -- cgit v1.2.3 From dcdedb24159be3487e3dbbe1faa79ae7d00c92ac Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 21 Feb 2018 05:17:28 +0100 Subject: sched/nohz: Remove the 1 Hz tick code Now that the 1Hz tick is offloaded to workqueues, we can safely remove the residual code that used to handle it locally. Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1519186649-3242-7-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 3d3a97d9399d..094217273ff9 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -37,8 +37,4 @@ extern void wake_up_nohz_cpu(int cpu); static inline void wake_up_nohz_cpu(int cpu) { } #endif -#ifdef CONFIG_NO_HZ_FULL -extern u64 scheduler_tick_max_deferment(void); -#endif - #endif /* _LINUX_SCHED_NOHZ_H */ -- cgit v1.2.3 From 285995d15d3b1725d021a8a274e55f2ce30ccfa0 Mon Sep 17 00:00:00 2001 From: Ognjen Galic Date: Wed, 7 Feb 2018 15:58:27 +0100 Subject: power: add to_power_supply macro to the API This patch adds the to_power_supply macro to upcast a device to a power_supply struct. This is needed because the same piece of code using container_of is used in various other places, so we abstract away such low-level operations via a macro. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Ognjen Galic Reviewed-by: Sebastian Reichel Signed-off-by: Rafael J. Wysocki --- include/linux/power_supply.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 79e90b3d3288..f0139b460a72 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -371,6 +371,8 @@ devm_power_supply_register_no_ws(struct device *parent, extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); +#define to_power_supply(device) container_of(device, struct power_supply, dev) + extern void *power_supply_get_drvdata(struct power_supply *psy); /* For APM emulation, think legacy userspace. */ extern struct class *power_supply_class; -- cgit v1.2.3 From 6bd067c48efed50ac0200c4a83a415bd524254e0 Mon Sep 17 00:00:00 2001 From: Bogdan Purcareata Date: Mon, 5 Feb 2018 08:07:42 -0600 Subject: staging: fsl-mc: Move core bus out of staging Move the source files out of staging into their final locations: -mc.h include file in drivers/staging/fsl-mc/include go to include/linux/fsl -source files in drivers/staging/fsl-mc/bus go to drivers/bus/fsl-mc -overview.rst, providing an overview of DPAA2, goes to Documentation/networking/dpaa2/overview.rst Update or delete other remaining staging files -- Makefile, Kconfig, TODO. Update dpaa2_eth and dpio staging drivers. Add integration bits for the documentation build system. Signed-off-by: Stuart Yoder [rebased, add dpaa2_eth and dpio #include updates] Signed-off-by: Laurentiu Tudor [rebased, split irqchip to separate patch] Signed-off-by: Bogdan Purcareata Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 454 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 454 insertions(+) create mode 100644 include/linux/fsl/mc.h (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h new file mode 100644 index 000000000000..765ba41f5987 --- /dev/null +++ b/include/linux/fsl/mc.h @@ -0,0 +1,454 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Freescale Management Complex (MC) bus public interface + * + * Copyright (C) 2014-2016 Freescale Semiconductor, Inc. + * Author: German Rivera + * + */ +#ifndef _FSL_MC_H_ +#define _FSL_MC_H_ + +#include +#include +#include + +#define FSL_MC_VENDOR_FREESCALE 0x1957 + +struct irq_domain; +struct msi_domain_info; + +struct fsl_mc_device; +struct fsl_mc_io; + +/** + * struct fsl_mc_driver - MC object device driver object + * @driver: Generic device driver + * @match_id_table: table of supported device matching Ids + * @probe: Function called when a device is added + * @remove: Function called when a device is removed + * @shutdown: Function called at shutdown time to quiesce the device + * @suspend: Function called when a device is stopped + * @resume: Function called when a device is resumed + * + * Generic DPAA device driver object for device drivers that are registered + * with a DPRC bus. This structure is to be embedded in each device-specific + * driver structure. + */ +struct fsl_mc_driver { + struct device_driver driver; + const struct fsl_mc_device_id *match_id_table; + int (*probe)(struct fsl_mc_device *dev); + int (*remove)(struct fsl_mc_device *dev); + void (*shutdown)(struct fsl_mc_device *dev); + int (*suspend)(struct fsl_mc_device *dev, pm_message_t state); + int (*resume)(struct fsl_mc_device *dev); +}; + +#define to_fsl_mc_driver(_drv) \ + container_of(_drv, struct fsl_mc_driver, driver) + +/** + * enum fsl_mc_pool_type - Types of allocatable MC bus resources + * + * Entries in these enum are used as indices in the array of resource + * pools of an fsl_mc_bus object. + */ +enum fsl_mc_pool_type { + FSL_MC_POOL_DPMCP = 0x0, /* corresponds to "dpmcp" in the MC */ + FSL_MC_POOL_DPBP, /* corresponds to "dpbp" in the MC */ + FSL_MC_POOL_DPCON, /* corresponds to "dpcon" in the MC */ + FSL_MC_POOL_IRQ, + + /* + * NOTE: New resource pool types must be added before this entry + */ + FSL_MC_NUM_POOL_TYPES +}; + +/** + * struct fsl_mc_resource - MC generic resource + * @type: type of resource + * @id: unique MC resource Id within the resources of the same type + * @data: pointer to resource-specific data if the resource is currently + * allocated, or NULL if the resource is not currently allocated. + * @parent_pool: pointer to the parent resource pool from which this + * resource is allocated from. + * @node: Node in the free list of the corresponding resource pool + * + * NOTE: This structure is to be embedded as a field of specific + * MC resource structures. + */ +struct fsl_mc_resource { + enum fsl_mc_pool_type type; + s32 id; + void *data; + struct fsl_mc_resource_pool *parent_pool; + struct list_head node; +}; + +/** + * struct fsl_mc_device_irq - MC object device message-based interrupt + * @msi_desc: pointer to MSI descriptor allocated by fsl_mc_msi_alloc_descs() + * @mc_dev: MC object device that owns this interrupt + * @dev_irq_index: device-relative IRQ index + * @resource: MC generic resource associated with the interrupt + */ +struct fsl_mc_device_irq { + struct msi_desc *msi_desc; + struct fsl_mc_device *mc_dev; + u8 dev_irq_index; + struct fsl_mc_resource resource; +}; + +#define to_fsl_mc_irq(_mc_resource) \ + container_of(_mc_resource, struct fsl_mc_device_irq, resource) + +/* Opened state - Indicates that an object is open by at least one owner */ +#define FSL_MC_OBJ_STATE_OPEN 0x00000001 +/* Plugged state - Indicates that the object is plugged */ +#define FSL_MC_OBJ_STATE_PLUGGED 0x00000002 + +/** + * Shareability flag - Object flag indicating no memory shareability. + * the object generates memory accesses that are non coherent with other + * masters; + * user is responsible for proper memory handling through IOMMU configuration. + */ +#define FSL_MC_OBJ_FLAG_NO_MEM_SHAREABILITY 0x0001 + +/** + * struct fsl_mc_obj_desc - Object descriptor + * @type: Type of object: NULL terminated string + * @id: ID of logical object resource + * @vendor: Object vendor identifier + * @ver_major: Major version number + * @ver_minor: Minor version number + * @irq_count: Number of interrupts supported by the object + * @region_count: Number of mappable regions supported by the object + * @state: Object state: combination of FSL_MC_OBJ_STATE_ states + * @label: Object label: NULL terminated string + * @flags: Object's flags + */ +struct fsl_mc_obj_desc { + char type[16]; + int id; + u16 vendor; + u16 ver_major; + u16 ver_minor; + u8 irq_count; + u8 region_count; + u32 state; + char label[16]; + u16 flags; +}; + +/** + * Bit masks for a MC object device (struct fsl_mc_device) flags + */ +#define FSL_MC_IS_DPRC 0x0001 + +/** + * struct fsl_mc_device - MC object device object + * @dev: Linux driver model device object + * @dma_mask: Default DMA mask + * @flags: MC object device flags + * @icid: Isolation context ID for the device + * @mc_handle: MC handle for the corresponding MC object opened + * @mc_io: Pointer to MC IO object assigned to this device or + * NULL if none. + * @obj_desc: MC description of the DPAA device + * @regions: pointer to array of MMIO region entries + * @irqs: pointer to array of pointers to interrupts allocated to this device + * @resource: generic resource associated with this MC object device, if any. + * + * Generic device object for MC object devices that are "attached" to a + * MC bus. + * + * NOTES: + * - For a non-DPRC object its icid is the same as its parent DPRC's icid. + * - The SMMU notifier callback gets invoked after device_add() has been + * called for an MC object device, but before the device-specific probe + * callback gets called. + * - DP_OBJ_DPRC objects are the only MC objects that have built-in MC + * portals. For all other MC objects, their device drivers are responsible for + * allocating MC portals for them by calling fsl_mc_portal_allocate(). + * - Some types of MC objects (e.g., DP_OBJ_DPBP, DP_OBJ_DPCON) are + * treated as resources that can be allocated/deallocated from the + * corresponding resource pool in the object's parent DPRC, using the + * fsl_mc_object_allocate()/fsl_mc_object_free() functions. These MC objects + * are known as "allocatable" objects. For them, the corresponding + * fsl_mc_device's 'resource' points to the associated resource object. + * For MC objects that are not allocatable (e.g., DP_OBJ_DPRC, DP_OBJ_DPNI), + * 'resource' is NULL. + */ +struct fsl_mc_device { + struct device dev; + u64 dma_mask; + u16 flags; + u16 icid; + u16 mc_handle; + struct fsl_mc_io *mc_io; + struct fsl_mc_obj_desc obj_desc; + struct resource *regions; + struct fsl_mc_device_irq **irqs; + struct fsl_mc_resource *resource; +}; + +#define to_fsl_mc_device(_dev) \ + container_of(_dev, struct fsl_mc_device, dev) + +#define MC_CMD_NUM_OF_PARAMS 7 + +struct mc_cmd_header { + u8 src_id; + u8 flags_hw; + u8 status; + u8 flags_sw; + __le16 token; + __le16 cmd_id; +}; + +struct mc_command { + u64 header; + u64 params[MC_CMD_NUM_OF_PARAMS]; +}; + +enum mc_cmd_status { + MC_CMD_STATUS_OK = 0x0, /* Completed successfully */ + MC_CMD_STATUS_READY = 0x1, /* Ready to be processed */ + MC_CMD_STATUS_AUTH_ERR = 0x3, /* Authentication error */ + MC_CMD_STATUS_NO_PRIVILEGE = 0x4, /* No privilege */ + MC_CMD_STATUS_DMA_ERR = 0x5, /* DMA or I/O error */ + MC_CMD_STATUS_CONFIG_ERR = 0x6, /* Configuration error */ + MC_CMD_STATUS_TIMEOUT = 0x7, /* Operation timed out */ + MC_CMD_STATUS_NO_RESOURCE = 0x8, /* No resources */ + MC_CMD_STATUS_NO_MEMORY = 0x9, /* No memory available */ + MC_CMD_STATUS_BUSY = 0xA, /* Device is busy */ + MC_CMD_STATUS_UNSUPPORTED_OP = 0xB, /* Unsupported operation */ + MC_CMD_STATUS_INVALID_STATE = 0xC /* Invalid state */ +}; + +/* + * MC command flags + */ + +/* High priority flag */ +#define MC_CMD_FLAG_PRI 0x80 +/* Command completion flag */ +#define MC_CMD_FLAG_INTR_DIS 0x01 + +static inline u64 mc_encode_cmd_header(u16 cmd_id, + u32 cmd_flags, + u16 token) +{ + u64 header = 0; + struct mc_cmd_header *hdr = (struct mc_cmd_header *)&header; + + hdr->cmd_id = cpu_to_le16(cmd_id); + hdr->token = cpu_to_le16(token); + hdr->status = MC_CMD_STATUS_READY; + if (cmd_flags & MC_CMD_FLAG_PRI) + hdr->flags_hw = MC_CMD_FLAG_PRI; + if (cmd_flags & MC_CMD_FLAG_INTR_DIS) + hdr->flags_sw = MC_CMD_FLAG_INTR_DIS; + + return header; +} + +static inline u16 mc_cmd_hdr_read_token(struct mc_command *cmd) +{ + struct mc_cmd_header *hdr = (struct mc_cmd_header *)&cmd->header; + u16 token = le16_to_cpu(hdr->token); + + return token; +} + +struct mc_rsp_create { + __le32 object_id; +}; + +struct mc_rsp_api_ver { + __le16 major_ver; + __le16 minor_ver; +}; + +static inline u32 mc_cmd_read_object_id(struct mc_command *cmd) +{ + struct mc_rsp_create *rsp_params; + + rsp_params = (struct mc_rsp_create *)cmd->params; + return le32_to_cpu(rsp_params->object_id); +} + +static inline void mc_cmd_read_api_version(struct mc_command *cmd, + u16 *major_ver, + u16 *minor_ver) +{ + struct mc_rsp_api_ver *rsp_params; + + rsp_params = (struct mc_rsp_api_ver *)cmd->params; + *major_ver = le16_to_cpu(rsp_params->major_ver); + *minor_ver = le16_to_cpu(rsp_params->minor_ver); +} + +/** + * Bit masks for a MC I/O object (struct fsl_mc_io) flags + */ +#define FSL_MC_IO_ATOMIC_CONTEXT_PORTAL 0x0001 + +/** + * struct fsl_mc_io - MC I/O object to be passed-in to mc_send_command() + * @dev: device associated with this Mc I/O object + * @flags: flags for mc_send_command() + * @portal_size: MC command portal size in bytes + * @portal_phys_addr: MC command portal physical address + * @portal_virt_addr: MC command portal virtual address + * @dpmcp_dev: pointer to the DPMCP device associated with the MC portal. + * + * Fields are only meaningful if the FSL_MC_IO_ATOMIC_CONTEXT_PORTAL flag is not + * set: + * @mutex: Mutex to serialize mc_send_command() calls that use the same MC + * portal, if the fsl_mc_io object was created with the + * FSL_MC_IO_ATOMIC_CONTEXT_PORTAL flag off. mc_send_command() calls for this + * fsl_mc_io object must be made only from non-atomic context. + * + * Fields are only meaningful if the FSL_MC_IO_ATOMIC_CONTEXT_PORTAL flag is + * set: + * @spinlock: Spinlock to serialize mc_send_command() calls that use the same MC + * portal, if the fsl_mc_io object was created with the + * FSL_MC_IO_ATOMIC_CONTEXT_PORTAL flag on. mc_send_command() calls for this + * fsl_mc_io object can be made from atomic or non-atomic context. + */ +struct fsl_mc_io { + struct device *dev; + u16 flags; + u32 portal_size; + phys_addr_t portal_phys_addr; + void __iomem *portal_virt_addr; + struct fsl_mc_device *dpmcp_dev; + union { + /* + * This field is only meaningful if the + * FSL_MC_IO_ATOMIC_CONTEXT_PORTAL flag is not set + */ + struct mutex mutex; /* serializes mc_send_command() */ + + /* + * This field is only meaningful if the + * FSL_MC_IO_ATOMIC_CONTEXT_PORTAL flag is set + */ + spinlock_t spinlock; /* serializes mc_send_command() */ + }; +}; + +int mc_send_command(struct fsl_mc_io *mc_io, struct mc_command *cmd); + +#ifdef CONFIG_FSL_MC_BUS +#define dev_is_fsl_mc(_dev) ((_dev)->bus == &fsl_mc_bus_type) +#else +/* If fsl-mc bus is not present device cannot belong to fsl-mc bus */ +#define dev_is_fsl_mc(_dev) (0) +#endif + +/* + * module_fsl_mc_driver() - Helper macro for drivers that don't do + * anything special in module init/exit. This eliminates a lot of + * boilerplate. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit() + */ +#define module_fsl_mc_driver(__fsl_mc_driver) \ + module_driver(__fsl_mc_driver, fsl_mc_driver_register, \ + fsl_mc_driver_unregister) + +/* + * Macro to avoid include chaining to get THIS_MODULE + */ +#define fsl_mc_driver_register(drv) \ + __fsl_mc_driver_register(drv, THIS_MODULE) + +int __must_check __fsl_mc_driver_register(struct fsl_mc_driver *fsl_mc_driver, + struct module *owner); + +void fsl_mc_driver_unregister(struct fsl_mc_driver *driver); + +int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev, + u16 mc_io_flags, + struct fsl_mc_io **new_mc_io); + +void fsl_mc_portal_free(struct fsl_mc_io *mc_io); + +int fsl_mc_portal_reset(struct fsl_mc_io *mc_io); + +int __must_check fsl_mc_object_allocate(struct fsl_mc_device *mc_dev, + enum fsl_mc_pool_type pool_type, + struct fsl_mc_device **new_mc_adev); + +void fsl_mc_object_free(struct fsl_mc_device *mc_adev); + +struct irq_domain *fsl_mc_msi_create_irq_domain(struct fwnode_handle *fwnode, + struct msi_domain_info *info, + struct irq_domain *parent); + +int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev); + +void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev); + +extern struct bus_type fsl_mc_bus_type; + +extern struct device_type fsl_mc_bus_dprc_type; +extern struct device_type fsl_mc_bus_dpni_type; +extern struct device_type fsl_mc_bus_dpio_type; +extern struct device_type fsl_mc_bus_dpsw_type; +extern struct device_type fsl_mc_bus_dpbp_type; +extern struct device_type fsl_mc_bus_dpcon_type; +extern struct device_type fsl_mc_bus_dpmcp_type; +extern struct device_type fsl_mc_bus_dpmac_type; +extern struct device_type fsl_mc_bus_dprtc_type; + +static inline bool is_fsl_mc_bus_dprc(const struct fsl_mc_device *mc_dev) +{ + return mc_dev->dev.type == &fsl_mc_bus_dprc_type; +} + +static inline bool is_fsl_mc_bus_dpni(const struct fsl_mc_device *mc_dev) +{ + return mc_dev->dev.type == &fsl_mc_bus_dpni_type; +} + +static inline bool is_fsl_mc_bus_dpio(const struct fsl_mc_device *mc_dev) +{ + return mc_dev->dev.type == &fsl_mc_bus_dpio_type; +} + +static inline bool is_fsl_mc_bus_dpsw(const struct fsl_mc_device *mc_dev) +{ + return mc_dev->dev.type == &fsl_mc_bus_dpsw_type; +} + +static inline bool is_fsl_mc_bus_dpbp(const struct fsl_mc_device *mc_dev) +{ + return mc_dev->dev.type == &fsl_mc_bus_dpbp_type; +} + +static inline bool is_fsl_mc_bus_dpcon(const struct fsl_mc_device *mc_dev) +{ + return mc_dev->dev.type == &fsl_mc_bus_dpcon_type; +} + +static inline bool is_fsl_mc_bus_dpmcp(const struct fsl_mc_device *mc_dev) +{ + return mc_dev->dev.type == &fsl_mc_bus_dpmcp_type; +} + +static inline bool is_fsl_mc_bus_dpmac(const struct fsl_mc_device *mc_dev) +{ + return mc_dev->dev.type == &fsl_mc_bus_dpmac_type; +} + +static inline bool is_fsl_mc_bus_dprtc(const struct fsl_mc_device *mc_dev) +{ + return mc_dev->dev.type == &fsl_mc_bus_dprtc_type; +} + +#endif /* _FSL_MC_H_ */ -- cgit v1.2.3 From a7f249e33a44432de30d50f3c57868bc00a8f362 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Thu, 15 Feb 2018 11:31:47 +0530 Subject: clocksource: timer-ti-dm: Add timer ops to the platform data structure Add timer ops to the platform data structure Signed-off-by: Keerthy Reviewed-by: Sebastian Reichel Tested-by: Ladislav Michl Signed-off-by: Tony Lindgren --- include/linux/platform_data/dmtimer-omap.h | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dmtimer-omap.h b/include/linux/platform_data/dmtimer-omap.h index a19b78d826e9..757a0f9e26f9 100644 --- a/include/linux/platform_data/dmtimer-omap.h +++ b/include/linux/platform_data/dmtimer-omap.h @@ -20,12 +20,50 @@ #ifndef __PLATFORM_DATA_DMTIMER_OMAP_H__ #define __PLATFORM_DATA_DMTIMER_OMAP_H__ +struct omap_dm_timer_ops { + struct omap_dm_timer *(*request_by_node)(struct device_node *np); + struct omap_dm_timer *(*request_specific)(int timer_id); + struct omap_dm_timer *(*request)(void); + + int (*free)(struct omap_dm_timer *timer); + + void (*enable)(struct omap_dm_timer *timer); + void (*disable)(struct omap_dm_timer *timer); + + int (*get_irq)(struct omap_dm_timer *timer); + int (*set_int_enable)(struct omap_dm_timer *timer, + unsigned int value); + int (*set_int_disable)(struct omap_dm_timer *timer, u32 mask); + + struct clk *(*get_fclk)(struct omap_dm_timer *timer); + + int (*start)(struct omap_dm_timer *timer); + int (*stop)(struct omap_dm_timer *timer); + int (*set_source)(struct omap_dm_timer *timer, int source); + + int (*set_load)(struct omap_dm_timer *timer, int autoreload, + unsigned int value); + int (*set_match)(struct omap_dm_timer *timer, int enable, + unsigned int match); + int (*set_pwm)(struct omap_dm_timer *timer, int def_on, + int toggle, int trigger); + int (*set_prescaler)(struct omap_dm_timer *timer, int prescaler); + + unsigned int (*read_counter)(struct omap_dm_timer *timer); + int (*write_counter)(struct omap_dm_timer *timer, + unsigned int value); + unsigned int (*read_status)(struct omap_dm_timer *timer); + int (*write_status)(struct omap_dm_timer *timer, + unsigned int value); +}; + struct dmtimer_platform_data { /* set_timer_src - Only used for OMAP1 devices */ int (*set_timer_src)(struct platform_device *pdev, int source); u32 timer_capability; u32 timer_errata; int (*get_context_loss_count)(struct device *); + const struct omap_dm_timer_ops *timer_ops; }; #endif /* __PLATFORM_DATA_DMTIMER_OMAP_H__ */ -- cgit v1.2.3 From 72e89f50084c6dbc58a00aeedf92c450dc1a8b1c Mon Sep 17 00:00:00 2001 From: Richard Haines Date: Tue, 13 Feb 2018 20:53:21 +0000 Subject: security: Add support for SCTP security hooks The SCTP security hooks are explained in: Documentation/security/LSM-sctp.rst Signed-off-by: Richard Haines Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 36 ++++++++++++++++++++++++++++++++++++ include/linux/security.h | 25 +++++++++++++++++++++++++ 2 files changed, 61 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7161d8e7ee79..84c0b927ea85 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -906,6 +906,33 @@ * associated with the TUN device's security structure. * @security pointer to the TUN devices's security structure. * + * Security hooks for SCTP + * + * @sctp_assoc_request: + * Passes the @ep and @chunk->skb of the association INIT packet to + * the security module. + * @ep pointer to sctp endpoint structure. + * @skb pointer to skbuff of association packet. + * Return 0 on success, error on failure. + * @sctp_bind_connect: + * Validiate permissions required for each address associated with sock + * @sk. Depending on @optname, the addresses will be treated as either + * for a connect or bind service. The @addrlen is calculated on each + * ipv4 and ipv6 address using sizeof(struct sockaddr_in) or + * sizeof(struct sockaddr_in6). + * @sk pointer to sock structure. + * @optname name of the option to validate. + * @address list containing one or more ipv4/ipv6 addresses. + * @addrlen total length of address(s). + * Return 0 on success, error on failure. + * @sctp_sk_clone: + * Called whenever a new socket is created by accept(2) (i.e. a TCP + * style socket) or when a socket is 'peeled off' e.g userspace + * calls sctp_peeloff(3). + * @ep pointer to current sctp endpoint structure. + * @sk pointer to current sock structure. + * @sk pointer to new sock structure. + * * Security hooks for Infiniband * * @ib_pkey_access: @@ -1665,6 +1692,12 @@ union security_list_options { int (*tun_dev_attach_queue)(void *security); int (*tun_dev_attach)(struct sock *sk, void *security); int (*tun_dev_open)(void *security); + int (*sctp_assoc_request)(struct sctp_endpoint *ep, + struct sk_buff *skb); + int (*sctp_bind_connect)(struct sock *sk, int optname, + struct sockaddr *address, int addrlen); + void (*sctp_sk_clone)(struct sctp_endpoint *ep, struct sock *sk, + struct sock *newsk); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND @@ -1914,6 +1947,9 @@ struct security_hook_heads { struct list_head tun_dev_attach_queue; struct list_head tun_dev_attach; struct list_head tun_dev_open; + struct list_head sctp_assoc_request; + struct list_head sctp_bind_connect; + struct list_head sctp_sk_clone; #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND struct list_head ib_pkey_access; diff --git a/include/linux/security.h b/include/linux/security.h index 73f1ef625d40..2ff5f5777a53 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -115,6 +115,7 @@ struct xfrm_policy; struct xfrm_state; struct xfrm_user_sec_ctx; struct seq_file; +struct sctp_endpoint; #ifdef CONFIG_MMU extern unsigned long mmap_min_addr; @@ -1229,6 +1230,11 @@ int security_tun_dev_create(void); int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); +int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb); +int security_sctp_bind_connect(struct sock *sk, int optname, + struct sockaddr *address, int addrlen); +void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk, + struct sock *newsk); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -1421,6 +1427,25 @@ static inline int security_tun_dev_open(void *security) { return 0; } + +static inline int security_sctp_assoc_request(struct sctp_endpoint *ep, + struct sk_buff *skb) +{ + return 0; +} + +static inline int security_sctp_bind_connect(struct sock *sk, int optname, + struct sockaddr *address, + int addrlen) +{ + return 0; +} + +static inline void security_sctp_sk_clone(struct sctp_endpoint *ep, + struct sock *sk, + struct sock *newsk) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND -- cgit v1.2.3 From a1f2ba04cc92414b6b933289365eab878b0b2bf4 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 19 Feb 2018 14:48:40 +0200 Subject: mac80211: add get TID helper Extracting the TID from the QOS header is common enough to justify helper. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e4cba332b705..8fe7e4306816 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -8,6 +8,7 @@ * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH + * Copyright (c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2501,6 +2502,17 @@ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr) return (u8 *)hdr + 24; } +/** + * ieee80211_get_tid - get qos TID + * @hdr: the frame + */ +static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr) +{ + u8 *qc = ieee80211_get_qos_ctl(hdr); + + return qc[0] & IEEE80211_QOS_CTL_TID_MASK; +} + /** * ieee80211_get_SA - get pointer to SA * @hdr: the frame -- cgit v1.2.3 From d060b40523dcd91428c7fb2aaa307de37887484a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 22 Feb 2018 13:57:30 -0800 Subject: ARM: OMAP2+: Prepare to pass auxdata for smartreflex We are still initializing smartreflex with platform data using omap_device_build(). We can instead pass the platform data in with auxdata in pdata-quirks.c and make the driver use that in later patches. Note that we cannot enable the auxdata use yet, this is done in the last patch of the series. Signed-off-by: Tony Lindgren --- include/linux/power/smartreflex.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/smartreflex.h b/include/linux/power/smartreflex.h index d8b187c3925d..7b81dad712de 100644 --- a/include/linux/power/smartreflex.h +++ b/include/linux/power/smartreflex.h @@ -143,6 +143,13 @@ #define OMAP3430_SR_ERRWEIGHT 0x04 #define OMAP3430_SR_ERRMAXLIMIT 0x02 +enum sr_instance { + OMAP_SR_MPU, /* shared with iva on omap3 */ + OMAP_SR_CORE, + OMAP_SR_IVA, + OMAP_SR_NR, +}; + struct omap_sr { char *name; struct list_head node; @@ -207,7 +214,6 @@ struct omap_smartreflex_dev_attr { const char *sensor_voltdm_name; }; -#ifdef CONFIG_POWER_AVS_OMAP /* * The smart reflex driver supports CLASS1 CLASS2 and CLASS3 SR. * The smartreflex class driver should pass the class type. @@ -290,6 +296,8 @@ struct omap_sr_data { struct voltagedomain *voltdm; }; +#ifdef CONFIG_POWER_AVS_OMAP + /* Smartreflex module enable/disable interface */ void omap_sr_enable(struct voltagedomain *voltdm); void omap_sr_disable(struct voltagedomain *voltdm); -- cgit v1.2.3 From 3ecac020d6dd09259414f423b577347ebee9f533 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 Feb 2018 23:20:35 +1100 Subject: PCI/AER: Move pci_uevent_ers() out of pci.h There's no reason pci_uevent_ers() needs to be inline in pci.h, so move it out to a C file. Given it's used by AER the obvious location would be somewhere in drivers/pci/pcie/aer, but because it's also used by powerpc EEH code unfortunately that doesn't work in the case where EEH is enabled but PCIEPORTBUS is not. So for now put it in pci-driver.c, next to pci_uevent(), with an appropriate #ifdef so it's not built if AER and EEH are both disabled. While we're moving it also fix up the kernel doc comment for @pdev to be accurate. Reported-by: Linus Torvalds Signed-off-by: Michael Ellerman Signed-off-by: Bjorn Helgaas Reviewed-by: Bryant G. Ly --- include/linux/pci.h | 38 +++----------------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 024a1beda008..19c1dbcff0c6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2280,41 +2280,9 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } -/** - * pci_uevent_ers - emit a uevent during recovery path of pci device - * @pdev: pci device to check - * @err_type: type of error event - * - */ -static inline void pci_uevent_ers(struct pci_dev *pdev, - enum pci_ers_result err_type) -{ - int idx = 0; - char *envp[3]; - - switch (err_type) { - case PCI_ERS_RESULT_NONE: - case PCI_ERS_RESULT_CAN_RECOVER: - envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY"; - envp[idx++] = "DEVICE_ONLINE=0"; - break; - case PCI_ERS_RESULT_RECOVERED: - envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY"; - envp[idx++] = "DEVICE_ONLINE=1"; - break; - case PCI_ERS_RESULT_DISCONNECT: - envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY"; - envp[idx++] = "DEVICE_ONLINE=0"; - break; - default: - break; - } - - if (idx > 0) { - envp[idx++] = NULL; - kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp); - } -} +#if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH) +void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); +#endif /* Provide the legacy pci_dma_* API */ #include -- cgit v1.2.3 From c37e627f9565368ed7bd1f3cf59a2d223ddba85a Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Tue, 13 Feb 2018 21:52:18 -0600 Subject: PCI/portdrv: Move pcieport_if.h to drivers/pci/pcie/ Move pcieport_if.h from include/linux to drivers/pci/pcie/pcieport_if.h because the interfaces there are only used by the PCI core. Replace all uses of #include with relative paths to the new file location, e.g., #include "../pcieport_if.h" Signed-off-by: Frederick Lawler Signed-off-by: Bjorn Helgaas --- include/linux/pcieport_if.h | 71 --------------------------------------------- 1 file changed, 71 deletions(-) delete mode 100644 include/linux/pcieport_if.h (limited to 'include/linux') diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h deleted file mode 100644 index b69769dbf659..000000000000 --- a/include/linux/pcieport_if.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * File: pcieport_if.h - * Purpose: PCI Express Port Bus Driver's IF Data Structure - * - * Copyright (C) 2004 Intel - * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) - */ - -#ifndef _PCIEPORT_IF_H_ -#define _PCIEPORT_IF_H_ - -/* Port Type */ -#define PCIE_ANY_PORT (~0) - -/* Service Type */ -#define PCIE_PORT_SERVICE_PME_SHIFT 0 /* Power Management Event */ -#define PCIE_PORT_SERVICE_PME (1 << PCIE_PORT_SERVICE_PME_SHIFT) -#define PCIE_PORT_SERVICE_AER_SHIFT 1 /* Advanced Error Reporting */ -#define PCIE_PORT_SERVICE_AER (1 << PCIE_PORT_SERVICE_AER_SHIFT) -#define PCIE_PORT_SERVICE_HP_SHIFT 2 /* Native Hotplug */ -#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) -#define PCIE_PORT_SERVICE_VC_SHIFT 3 /* Virtual Channel */ -#define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT) -#define PCIE_PORT_SERVICE_DPC_SHIFT 4 /* Downstream Port Containment */ -#define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT) - -struct pcie_device { - int irq; /* Service IRQ/MSI/MSI-X Vector */ - struct pci_dev *port; /* Root/Upstream/Downstream Port */ - u32 service; /* Port service this device represents */ - void *priv_data; /* Service Private Data */ - struct device device; /* Generic Device Interface */ -}; -#define to_pcie_device(d) container_of(d, struct pcie_device, device) - -static inline void set_service_data(struct pcie_device *dev, void *data) -{ - dev->priv_data = data; -} - -static inline void *get_service_data(struct pcie_device *dev) -{ - return dev->priv_data; -} - -struct pcie_port_service_driver { - const char *name; - int (*probe) (struct pcie_device *dev); - void (*remove) (struct pcie_device *dev); - int (*suspend) (struct pcie_device *dev); - int (*resume) (struct pcie_device *dev); - - /* Device driver may resume normal operations */ - void (*error_resume)(struct pci_dev *dev); - - /* Link Reset Capability - AER service driver specific */ - pci_ers_result_t (*reset_link) (struct pci_dev *dev); - - int port_type; /* Type of the port this driver can handle */ - u32 service; /* Port service this device represents */ - - struct device_driver driver; -}; -#define to_service_driver(d) \ - container_of(d, struct pcie_port_service_driver, driver) - -int pcie_port_service_register(struct pcie_port_service_driver *new); -void pcie_port_service_unregister(struct pcie_port_service_driver *new); - -#endif /* _PCIEPORT_IF_H_ */ -- cgit v1.2.3 From 4ef76ad0462cf25ce948541c8724eaa8a8365e1d Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Tue, 20 Feb 2018 19:19:27 -0800 Subject: PCI: Add ACS quirk for Ampere root ports The Ampere Computing PCIe root port does not support ACS at this point. However, the hardware provides isolation and source validation through the SMMU. The stream ID generated by the PCIe ports contain both the bus/device/function number as well as the port ID in its 3 most significant bits. Turn on ACS but disable all the peer-to-peer features. APM is being rebranded to Ampere. The Vendor and Device IDs change, but the functionality stays the same. Signed-off-by: Feng Kan Signed-off-by: Bjorn Helgaas --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a6b30667a331..c875d4223f44 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1333,6 +1333,7 @@ #define PCI_DEVICE_ID_IMS_TT3D 0x9135 #define PCI_VENDOR_ID_AMCC 0x10e8 +#define PCI_VENDOR_ID_AMPERE 0x1def #define PCI_VENDOR_ID_INTERG 0x10ea #define PCI_DEVICE_ID_INTERG_1682 0x1682 -- cgit v1.2.3 From 492a1abd61e4b4f78c1c5804840a304a9e32da04 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 22 Feb 2018 14:59:20 +0200 Subject: dmi: Introduce the dmi_get_bios_year() helper function The pattern to only extract the year portion of date is used in several places and more users may come. By using this helper they may create slightly cleaner code. Signed-off-by: Andy Shevchenko [ Minor stylistic cleanup. ] Cc: Bjorn Helgaas Cc: Jean Delvare Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/20180222125923.57385-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/dmi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 46e151172d95..0bade156e908 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -147,4 +147,13 @@ static inline const struct dmi_system_id * #endif +static inline int dmi_get_bios_year(void) +{ + int year; + + dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL); + + return year; +} + #endif /* __DMI_H__ */ -- cgit v1.2.3 From 5f171577b4f35b44795a73bde8cf2c49b4073925 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 24 Oct 2017 16:52:32 +0100 Subject: Drop a bunch of metag references Now that arch/metag/ has been removed, drop a bunch of metag references in various codes across the whole tree: - VM_GROWSUP and __VM_ARCH_SPECIFIC_1. - MT_METAG_* ELF note types. - METAG Kconfig dependencies (FRAME_POINTER) and ranges (MAX_STACK_SIZE_MB). - metag cases in tools (checkstack.pl, recordmcount.c, perf). Signed-off-by: James Hogan Acked-by: Steven Rostedt (VMware) Acked-by: Peter Zijlstra (Intel) Reviewed-by: Guenter Roeck Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: linux-mm@kvack.org Cc: linux-metag@vger.kernel.org --- include/linux/cpuhotplug.h | 1 - include/linux/mm.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 5172ad0daa7c..c7a950681f3a 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -108,7 +108,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_STARTING, CPUHP_AP_PERF_X86_CSTATE_STARTING, CPUHP_AP_PERF_XTENSA_STARTING, - CPUHP_AP_PERF_METAG_STARTING, CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, CPUHP_AP_ARM_SDEI_STARTING, CPUHP_AP_ARM_VFP_STARTING, diff --git a/include/linux/mm.h b/include/linux/mm.h index ad06d42adb1a..ccac10682ce5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -241,8 +241,6 @@ extern unsigned int kobjsize(const void *objp); # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 -#elif defined(CONFIG_METAG) -# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_IA64) # define VM_GROWSUP VM_ARCH_1 #elif !defined(CONFIG_MMU) -- cgit v1.2.3 From df46bb1909d92eedccd4216c88e43f75cb0b2901 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 21 Feb 2018 15:31:32 +0000 Subject: irqchip: Remove metag irqchip drivers Now that arch/metag/ has been removed, remove the two metag irqchip drivers. They are of no value without the architecture code. - irq-metag: Meta internal (HWSTATMETA) interrupt code. - irq-metag-ext: Meta External interrupt code. Signed-off-by: James Hogan Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Cc: linux-metag@vger.kernel.org --- include/linux/irqchip/metag-ext.h | 34 ---------------------------------- include/linux/irqchip/metag.h | 25 ------------------------- 2 files changed, 59 deletions(-) delete mode 100644 include/linux/irqchip/metag-ext.h delete mode 100644 include/linux/irqchip/metag.h (limited to 'include/linux') diff --git a/include/linux/irqchip/metag-ext.h b/include/linux/irqchip/metag-ext.h deleted file mode 100644 index d120496370b9..000000000000 --- a/include/linux/irqchip/metag-ext.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2012 Imagination Technologies - */ - -#ifndef _LINUX_IRQCHIP_METAG_EXT_H_ -#define _LINUX_IRQCHIP_METAG_EXT_H_ - -struct irq_data; -struct platform_device; - -/* called from core irq code at init */ -int init_external_IRQ(void); - -/* - * called from SoC init_irq() callback to dynamically indicate the lack of - * HWMASKEXT registers. - */ -void meta_intc_no_mask(void); - -/* - * These allow SoCs to specialise the interrupt controller from their init_irq - * callbacks. - */ - -extern struct irq_chip meta_intc_edge_chip; -extern struct irq_chip meta_intc_level_chip; - -/* this should be called in the mask callback */ -void meta_intc_mask_irq_simple(struct irq_data *data); -/* this should be called in the unmask callback */ -void meta_intc_unmask_irq_simple(struct irq_data *data); - -#endif /* _LINUX_IRQCHIP_METAG_EXT_H_ */ diff --git a/include/linux/irqchip/metag.h b/include/linux/irqchip/metag.h deleted file mode 100644 index 0adcf449e4e4..000000000000 --- a/include/linux/irqchip/metag.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2011 Imagination Technologies - */ - -#ifndef _LINUX_IRQCHIP_METAG_H_ -#define _LINUX_IRQCHIP_METAG_H_ - -#include - -#ifdef CONFIG_METAG_PERFCOUNTER_IRQS -extern int init_internal_IRQ(void); -extern int internal_irq_map(unsigned int hw); -#else -static inline int init_internal_IRQ(void) -{ - return 0; -} -static inline int internal_irq_map(unsigned int hw) -{ - return -EINVAL; -} -#endif - -#endif /* _LINUX_IRQCHIP_METAG_H_ */ -- cgit v1.2.3 From b79a732504ad2d6552458eaf72b4ed807da88340 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 21 Feb 2018 15:42:32 +0000 Subject: clocksource: Remove metag generic timer driver Now that arch/metag/ has been removed, remove the metag generic per-thread timer driver. It is of no value without the architecture code. Signed-off-by: James Hogan Acked-by: Daniel Lezcano Cc: Thomas Gleixner Cc: linux-metag@vger.kernel.org --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c7a950681f3a..5b211fe295f0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -121,7 +121,6 @@ enum cpuhp_state { CPUHP_AP_JCORE_TIMER_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_TWD_STARTING, - CPUHP_AP_METAG_TIMER_STARTING, CPUHP_AP_QCOM_TIMER_STARTING, CPUHP_AP_ARMADA_TIMER_STARTING, CPUHP_AP_MARCO_TIMER_STARTING, -- cgit v1.2.3 From ee07862f7b4594d390b978f6636a6a6191632ab3 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 23 Feb 2018 14:58:41 +0800 Subject: bpf: NULL pointer check is not needed in BPF_CGROUP_RUN_PROG_INET_SOCK sk is already allocated in inet_create/inet6_create, hence when BPF_CGROUP_RUN_PROG_INET_SOCK is executed sk will never be NULL. The logic is as bellow, sk = sk_alloc(); if (!sk) goto out; BPF_CGROUP_RUN_PROG_INET_SOCK(sk); Signed-off-by: Yafang Shao Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a7f16e0f8d68..8a4566691c8f 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled && sk) { \ + if (cgroup_bpf_enabled) { \ __ret = __cgroup_bpf_run_filter_sk(sk, \ BPF_CGROUP_INET_SOCK_CREATE); \ } \ -- cgit v1.2.3 From 57cbd893c4c575a24594fa6c0835247506ce26e2 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 16 Jan 2018 14:04:14 +0000 Subject: net/mlx5: E-Switch, Move representors definition to a global scope In preparation for IB representors, move representors structs to a global scope, also expose functions needed for registration, unregistration, eswitch mode and creating a flow rule to direct traffic from SQs to the right VF. Signed-off-by: Mark Bloch Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 6 +++++ include/linux/mlx5/eswitch.h | 57 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 include/linux/mlx5/eswitch.h (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bfea26af6de5..4814cad7456e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); } +#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) +#define MLX5_VPORT_MANAGER(mdev) \ + (MLX5_CAP_GEN(mdev, vport_group_manager) && \ + (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ + mlx5_core_is_pf(mdev)) + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h new file mode 100644 index 000000000000..f62bf486c18c --- /dev/null +++ b/include/linux/mlx5/eswitch.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + */ + +#ifndef _MLX5_ESWITCH_ +#define _MLX5_ESWITCH_ + +#include + +enum { + SRIOV_NONE, + SRIOV_LEGACY, + SRIOV_OFFLOADS +}; + +enum { + REP_ETH, + NUM_REP_TYPES, +}; + +struct mlx5_eswitch_rep; +struct mlx5_eswitch_rep_if { + int (*load)(struct mlx5_core_dev *dev, + struct mlx5_eswitch_rep *rep); + void (*unload)(struct mlx5_eswitch_rep *rep); + void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); + void *priv; + bool valid; +}; + +struct mlx5_eswitch_rep { + struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES]; + u16 vport; + u8 hw_id[ETH_ALEN]; + u16 vlan; + u32 vlan_refcount; +}; + +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + int vport_index, + struct mlx5_eswitch_rep_if *rep_if, + u8 rep_type); +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport_index, + u8 rep_type); +void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, + int vport, + u8 rep_type); +struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, + int vport); +void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type); +u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, + int vport, u32 sqn); +#endif -- cgit v1.2.3 From 5e65b02c00900155833008b7992bbbbc7f0df2ac Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 16 Jan 2018 14:13:46 +0000 Subject: net/mlx5: E-Switch, Add definition of IB representor Create a new representor type: REP_IB. which will be initialized by an IB device that is used as a logical representor of a eswitch vport (VF or uplink) just like we have a net device today in switchdev mode. Signed-off-by: Mark Bloch Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index f62bf486c18c..d3c9db492b30 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -16,6 +16,7 @@ enum { enum { REP_ETH, + REP_IB, NUM_REP_TYPES, }; -- cgit v1.2.3 From e69c61dd050e410d78363e5fe6e56a9f719abdf5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 24 Feb 2018 21:22:18 -0800 Subject: genirq: Drop 5 #included header files from irq.h does not use nor need several of its #included files, so drop those header files from irq.h. is currently #included in around 1135 C source files (oops, I didn't count other header files that #include it), making it the 29th most-used header file. Build tested on i386 and x86_64 * (allnoconfig, tiny.config, defconfig, allyesconfig, and allmodconfig) and x64_64 allmodconfig + SMP=disabled. Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/02745e91-c117-74b5-d043-dceb3d4bb4e0@infradead.org --- include/linux/irq.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index a0231e96a578..979eed1b2654 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -10,18 +10,13 @@ * Thanks. --rmk */ -#include -#include #include #include #include -#include #include #include #include -#include #include -#include #include #include -- cgit v1.2.3 From 6ce5ae7977c89f2a09092954396a66c90e8213f2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 24 Feb 2018 21:22:12 -0800 Subject: mutex: Drop linkage.h from mutex.h does not use nor need , so drop that one header file from mutex.h. is currently #included in around 1250 C source files (oops, I didn't count other header files that #include it), making it the 27th most-used header file. Build tested on i386 and x86_64 * (allnoconfig, tiny.config, defconfig, allyesconfig, and allmodconfig) and x64_64 allmodconfig + SMP=disabled. Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/582b3892-4e4c-06b2-a368-5c2d439de7fc@infradead.org --- include/linux/mutex.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f25c13423bd4..9b7fe56692bd 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From dfc9327ab7c99bc13e12106448615efba833886b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 19 Feb 2018 11:09:04 +0100 Subject: acpi: Introduce acpi_arch_get_root_pointer() for getting rsdp address Add an architecture specific function to get the address of the RSDP table. Per default it will just return 0 indicating falling back to the current mechanism. Signed-off-by: Juergen Gross Reviewed-by: Andy Shevchenko Acked-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Cc: Borislav Petkov Cc: Eric Biederman Cc: H. Peter Anvin Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: boris.ostrovsky@oracle.com Cc: lenb@kernel.org Cc: linux-acpi@vger.kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20180219100906.14265-2-jgross@suse.com Signed-off-by: Ingo Molnar --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 968173ec2726..15bfb15c2fa5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -623,6 +623,13 @@ bool acpi_gtdt_c3stop(int type); int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count); #endif +#ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER +static inline u64 acpi_arch_get_root_pointer(void) +{ + return 0; +} +#endif + #else /* !CONFIG_ACPI */ #define acpi_disabled 1 -- cgit v1.2.3 From 31895662f9ba81e8ea9ef05abf8edcb29d4b9c18 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 21 Feb 2018 10:20:25 +0100 Subject: regmap: mmio: Add function to attach a clock regmap_init_mmio_clk allows to specify a clock that needs to be enabled while accessing the registers. However, that clock is retrieved through its clock ID, which means it will lookup that clock based on the current device that registers the regmap, and, in the DT case, will only look in that device OF node. This might be problematic if the clock to enable is stored in another node. Let's add a function that allows to attach a clock that has already been retrieved to a regmap in order to fix this. Signed-off-by: Maxime Ripard Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 6a3aeba40e9e..5f7ad0552c03 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -21,6 +21,7 @@ #include struct module; +struct clk; struct device; struct i2c_client; struct irq_domain; @@ -905,6 +906,8 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__devm_regmap_init_sdw, #config, \ sdw, config) +int regmap_mmio_attach_clk(struct regmap *map, struct clk *clk); +void regmap_mmio_detach_clk(struct regmap *map); void regmap_exit(struct regmap *map); int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config); -- cgit v1.2.3 From ef70b0bdeaf893dd6d9c3a8d05d9b65d395506c0 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 22 Feb 2018 14:00:25 -0800 Subject: bus: ti-sysc: Add support for platform data callbacks We want to pass the device tree configuration for interconnect target modules from ti-sysc driver to the existing platform hwmod code. This allows us to first validate the dts data against the existing platform data before we start dropping the platform data in favor of device tree data. To do this, let's add platform data callbacks for PM runtime functions to call for the interconnect target modules if platform data is available. Note that as ti-sysc driver can rebind, omap_auxdata_lookup and related functions can no longer be __init. Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 1be356330b96..4176cb90e195 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -16,6 +16,10 @@ enum ti_sysc_module_type { TI_SYSC_OMAP4_USB_HOST_FS, }; +struct ti_sysc_cookie { + void *data; +}; + /** * struct sysc_regbits - TI OCP_SYSCONFIG register field offsets * @midle_shift: Offset of the midle bit @@ -83,4 +87,49 @@ struct sysc_config { u32 quirks; }; +enum sysc_registers { + SYSC_REVISION, + SYSC_SYSCONFIG, + SYSC_SYSSTATUS, + SYSC_MAX_REGS, +}; + +/** + * struct ti_sysc_module_data - ti-sysc to hwmod translation data for a module + * @name: legacy "ti,hwmods" module name + * @module_pa: physical address of the interconnect target module + * @module_size: size of the interconnect target module + * @offsets: array of register offsets as listed in enum sysc_registers + * @nr_offsets: number of registers + * @cap: interconnect target module capabilities + * @cfg: interconnect target module configuration + * + * This data is enough to allocate a new struct omap_hwmod_class_sysconfig + * based on device tree data parsed by ti-sysc driver. + */ +struct ti_sysc_module_data { + const char *name; + u64 module_pa; + u32 module_size; + int *offsets; + int nr_offsets; + const struct sysc_capabilities *cap; + struct sysc_config *cfg; +}; + +struct device; + +struct ti_sysc_platform_data { + struct of_dev_auxdata *auxdata; + int (*init_module)(struct device *dev, + const struct ti_sysc_module_data *data, + struct ti_sysc_cookie *cookie); + int (*enable_module)(struct device *dev, + const struct ti_sysc_cookie *cookie); + int (*idle_module)(struct device *dev, + const struct ti_sysc_cookie *cookie); + int (*shutdown_module)(struct device *dev, + const struct ti_sysc_cookie *cookie); +}; + #endif /* __TI_SYSC_DATA_H__ */ -- cgit v1.2.3 From a885f0fe209f262efa2c1cac9278a5774e5f7a80 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 22 Feb 2018 14:03:48 -0800 Subject: bus: ti-sysc: Handle some devices in omap_device compatible way Now that ti-sysc can manage child devices, we must also be backwards compatible with the current omap_device code. With omap_device, we assume that the child device manages the interconnect target module directly. The drivers needing special handling are the ones that still set pm_runtime_irq_safe(). In the long run we want to update those drivers as otherwise they will cause problems with genpd as a permanent PM runtime usage count is set on the parent device. We can handle omap_device these devices by improving the ti-sysc quirk handling to detect the devices needing special handling based on register map and revision register if usable. We also need to implement dev_pm_domain for these child devices just like omap_device does. Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 4176cb90e195..80ce28d40832 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -45,6 +45,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_QUIRK_LEGACY_IDLE BIT(8) #define SYSC_QUIRK_RESET_STATUS BIT(7) #define SYSC_QUIRK_NO_IDLE_ON_INIT BIT(6) #define SYSC_QUIRK_NO_RESET_ON_INIT BIT(5) -- cgit v1.2.3 From 209f668cd29d2b6b9f39a0b9f179ee40f47c2014 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 26 Feb 2018 16:04:19 -0800 Subject: console: Fill in struct consw argument names Reading the function declarations for the console callbacks lacks any hints as to what the arguments are. Instead of going and digging around in various implementations that may each only have a subset of the callbacks, name all the arguments in the declaration. This has no functional change. Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 58 +++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index b8920a031a3e..dfd6b0e97855 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -46,46 +46,52 @@ enum con_scroll { struct consw { struct module *owner; const char *(*con_startup)(void); - void (*con_init)(struct vc_data *, int); - void (*con_deinit)(struct vc_data *); - void (*con_clear)(struct vc_data *, int, int, int, int); - void (*con_putc)(struct vc_data *, int, int, int); - void (*con_putcs)(struct vc_data *, const unsigned short *, int, int, int); - void (*con_cursor)(struct vc_data *, int); - bool (*con_scroll)(struct vc_data *, unsigned int top, + void (*con_init)(struct vc_data *vc, int init); + void (*con_deinit)(struct vc_data *vc); + void (*con_clear)(struct vc_data *vc, int sy, int sx, int height, + int width); + void (*con_putc)(struct vc_data *vc, int c, int ypos, int xpos); + void (*con_putcs)(struct vc_data *vc, const unsigned short *s, + int count, int ypos, int xpos); + void (*con_cursor)(struct vc_data *vc, int mode); + bool (*con_scroll)(struct vc_data *vc, unsigned int top, unsigned int bottom, enum con_scroll dir, unsigned int lines); - int (*con_switch)(struct vc_data *); - int (*con_blank)(struct vc_data *, int, int); - int (*con_font_set)(struct vc_data *, struct console_font *, unsigned); - int (*con_font_get)(struct vc_data *, struct console_font *); - int (*con_font_default)(struct vc_data *, struct console_font *, char *); - int (*con_font_copy)(struct vc_data *, int); - int (*con_resize)(struct vc_data *, unsigned int, unsigned int, - unsigned int); - void (*con_set_palette)(struct vc_data *, + int (*con_switch)(struct vc_data *vc); + int (*con_blank)(struct vc_data *vc, int blank, int mode_switch); + int (*con_font_set)(struct vc_data *vc, struct console_font *font, + unsigned int flags); + int (*con_font_get)(struct vc_data *vc, struct console_font *font); + int (*con_font_default)(struct vc_data *vc, + struct console_font *font, char *name); + int (*con_font_copy)(struct vc_data *vc, int con); + int (*con_resize)(struct vc_data *vc, unsigned int width, + unsigned int height, unsigned int user); + void (*con_set_palette)(struct vc_data *vc, const unsigned char *table); - void (*con_scrolldelta)(struct vc_data *, int lines); - int (*con_set_origin)(struct vc_data *); - void (*con_save_screen)(struct vc_data *); - u8 (*con_build_attr)(struct vc_data *, u8, u8, u8, u8, u8, u8); - void (*con_invert_region)(struct vc_data *, u16 *, int); - u16 *(*con_screen_pos)(struct vc_data *, int); - unsigned long (*con_getxy)(struct vc_data *, unsigned long, int *, int *); + void (*con_scrolldelta)(struct vc_data *vc, int lines); + int (*con_set_origin)(struct vc_data *vc); + void (*con_save_screen)(struct vc_data *vc); + u8 (*con_build_attr)(struct vc_data *vc, u8 color, u8 intensity, + u8 blink, u8 underline, u8 reverse, u8 italic); + void (*con_invert_region)(struct vc_data *vc, u16 *p, int count); + u16 *(*con_screen_pos)(struct vc_data *vc, int offset); + unsigned long (*con_getxy)(struct vc_data *vc, unsigned long position, + int *px, int *py); /* * Flush the video console driver's scrollback buffer */ - void (*con_flush_scrollback)(struct vc_data *); + void (*con_flush_scrollback)(struct vc_data *vc); /* * Prepare the console for the debugger. This includes, but is not * limited to, unblanking the console, loading an appropriate * palette, and allowing debugger generated output. */ - int (*con_debug_enter)(struct vc_data *); + int (*con_debug_enter)(struct vc_data *vc); /* * Restore the console to its pre-debug state as closely as possible. */ - int (*con_debug_leave)(struct vc_data *); + int (*con_debug_leave)(struct vc_data *vc); }; extern const struct consw *conswitchp; -- cgit v1.2.3 From 7b1f641776e0c8b824fb10135168e4b683a9e2ba Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Feb 2018 15:07:52 +0100 Subject: fsnotify: Let userspace know about lost events due to ENOMEM Currently if notification event is lost due to event allocation failing we ENOMEM, we just silently continue (except for fanotify permission events where we deny the access). This is undesirable as userspace has no way of knowing whether the notifications it got are complete or not. Treat lost events due to ENOMEM the same way as lost events due to queue overflow so that userspace knows something bad happened and it likely needs to rescan the filesystem. Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 067d52e95f02..9f1edb92c97e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -331,6 +331,12 @@ extern int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct list_head *, struct fsnotify_event *)); +/* Queue overflow event to a notification group */ +static inline void fsnotify_queue_overflow(struct fsnotify_group *group) +{ + fsnotify_add_event(group, group->overflow_event, NULL); +} + /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ -- cgit v1.2.3 From 243ac21035176ac9692c1308a9f3b8f6a4e5d733 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 20 Feb 2018 07:30:22 -0600 Subject: ipmi: Add or fix SPDX-License-Identifier in all files And get rid of the license text that is no longer necessary. Signed-off-by: Corey Minyard Cc: Kees Cook Cc: Alistair Popple Cc: Jeremy Kerr Cc: Joel Stanley Cc: Rocky Craig --- include/linux/ipmi-fru.h | 3 +-- include/linux/ipmi.h | 21 +-------------------- include/linux/ipmi_smi.h | 21 +-------------------- 3 files changed, 3 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipmi-fru.h b/include/linux/ipmi-fru.h index 4d3a76380e32..05c9422624c6 100644 --- a/include/linux/ipmi-fru.h +++ b/include/linux/ipmi-fru.h @@ -1,9 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Copyright (C) 2012 CERN (www.cern.ch) * Author: Alessandro Rubini * - * Released according to the GNU GPL, version 2 or any later version. - * * This work is part of the White Rabbit project, a research effort led * by CERN, the European Institute for Nuclear Research. */ diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index f4ffacf4fe9d..8b0626cec980 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * ipmi.h * @@ -9,26 +10,6 @@ * * Copyright 2002 MontaVista Software Inc. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __LINUX_IPMI_H #define __LINUX_IPMI_H diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 5be51281e14d..af457b5a689e 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * ipmi_smi.h * @@ -9,26 +10,6 @@ * * Copyright 2002 MontaVista Software Inc. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __LINUX_IPMI_SMI_H -- cgit v1.2.3 From 41d9d44d725808f27b53f266733e6d17d83020ba Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 23 Feb 2018 09:43:56 -0600 Subject: ARM: OMAP2+: pm33xx-core: Add platform code needed for PM Most of the PM code needed for am335x and am437x can be moved into a module under drivers but some core code must remain in mach-omap2 at the moment. This includes some internal clockdomain APIs and low-level ARM APIs which are also not exported for use by modules. Implement a few functions that handle these low-level platform operations can be passed to the pm33xx module through the use of platform data. In addition to this, to be able to share data structures between C and the sleep33xx and sleep43xx assembly code, we can automatically generate all of the C struct member offsets and sizes as macros by processing pm-asm-offsets.c into assembly code and then extracting the relevant data as is done for the generated platform asm-offsets.h files. Finally, add amx3_common_pm_init to create a dummy platform_device for pm33xx so that our soon to be introduced pm33xx module can probe on am335x and am437x platforms to enable basic suspend to mem and standby support. Signed-off-by: Dave Gerlach Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren --- include/linux/platform_data/pm33xx.h | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/linux/platform_data/pm33xx.h (limited to 'include/linux') diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h new file mode 100644 index 000000000000..f9bed2a0af9d --- /dev/null +++ b/include/linux/platform_data/pm33xx.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TI pm33xx platform data + * + * Copyright (C) 2016-2018 Texas Instruments, Inc. + * Dave Gerlach + */ + +#ifndef _LINUX_PLATFORM_DATA_PM33XX_H +#define _LINUX_PLATFORM_DATA_PM33XX_H + +#include +#include + +#ifndef __ASSEMBLER__ +struct am33xx_pm_sram_addr { + void (*do_wfi)(void); + unsigned long *do_wfi_sz; + unsigned long *resume_offset; + unsigned long *emif_sram_table; + unsigned long *ro_sram_data; +}; + +struct am33xx_pm_platform_data { + int (*init)(void); + int (*soc_suspend)(unsigned int state, int (*fn)(unsigned long)); + struct am33xx_pm_sram_addr *(*get_sram_addrs)(void); +}; + +struct am33xx_pm_sram_data { + u32 wfi_flags; + u32 l2_aux_ctrl_val; + u32 l2_prefetch_ctrl_val; +} __packed __aligned(8); + +struct am33xx_pm_ro_sram_data { + u32 amx3_pm_sram_data_virt; + u32 amx3_pm_sram_data_phys; +} __packed __aligned(8); + +#endif /* __ASSEMBLER__ */ +#endif /* _LINUX_PLATFORM_DATA_PM33XX_H */ -- cgit v1.2.3 From ead18c23c263374ed098a7d955b29b4a466d4573 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 10 Feb 2018 19:27:12 +0100 Subject: driver core: Introduce device links reference counting If device_link_add() is invoked multiple times with the same supplier and consumer combo, it will create the link on first addition and return a pointer to the already existing link on all subsequent additions. The semantics for device_link_del() are quite different, it deletes the link unconditionally, so multiple invocations are not allowed. In other words, this snippet ... struct device *dev1, *dev2; struct device_link *link1, *link2; link1 = device_link_add(dev1, dev2, 0); link2 = device_link_add(dev1, dev2, 0); device_link_del(link1); device_link_del(link2); ... causes the following crash: WARNING: CPU: 4 PID: 2686 at drivers/base/power/runtime.c:1611 pm_runtime_drop_link+0x40/0x50 [...] list_del corruption, 0000000039b800a4->prev is LIST_POISON2 (00000000ecf79852) kernel BUG at lib/list_debug.c:50! The issue isn't as arbitrary as it may seem: Imagine a device link which is added in both the supplier's and the consumer's ->probe hook. The two drivers can't just call device_link_del() in their ->remove hook without coordination. Fix by counting multiple additions and dropping the device link only when the last addition is unwound. Signed-off-by: Lukas Wunner [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index b093405ed525..abf952c82c6d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -769,6 +769,7 @@ enum device_link_state { * @status: The state of the link (with respect to the presence of drivers). * @flags: Link flags. * @rpm_active: Whether or not the consumer device is runtime-PM-active. + * @kref: Count repeated addition of the same link. * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks. */ struct device_link { @@ -779,6 +780,7 @@ struct device_link { enum device_link_state status; u32 flags; bool rpm_active; + struct kref kref; #ifdef CONFIG_SRCU struct rcu_head rcu_head; #endif -- cgit v1.2.3 From d417e0691ac00d35c4e6b90fc3fc85631a7865ad Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 22 Feb 2018 11:29:44 +0530 Subject: cpufreq: Validate frequency table in the core By design, cpufreq drivers are responsible for calling cpufreq_frequency_table_cpuinfo() from their ->init() callbacks to validate the frequency table. However, if a cpufreq driver is buggy and fails to do so properly, it lead to unexpected behavior of the driver or the cpufreq core at a later point in time. It would be better if the core could validate the frequency table during driver initialization. To that end, introduce cpufreq_table_validate_and_sort() and make the cpufreq core call it right after invoking the ->init() callback of the driver and destroy the cpufreq policy if the table is invalid. For the time being the validation of the table happens twice, once from the driver and then from the core. The individual drivers will be updated separately to drop table validation if they don't need it for other reasons. The frequency table is marked "sorted" or "unsorted" by the new helper now instead of in cpufreq_table_validate_and_show(), as it should only be done after validating the table (which the drivers won't do going forward). Signed-off-by: Viresh Kumar [ rjw: Subject/changelog ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 21e8d248d956..1fe49724da9e 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -962,6 +962,7 @@ extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; extern struct freq_attr *cpufreq_generic_attr[]; int cpufreq_table_validate_and_show(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); +int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy); unsigned int cpufreq_generic_get(unsigned int cpu); int cpufreq_generic_init(struct cpufreq_policy *policy, -- cgit v1.2.3 From 9c2c2e62df3fa30fb13fbeb7512a4eede729383b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 27 Feb 2018 01:56:06 +0100 Subject: net: phy: Restore phy_resume() locking assumption commit f5e64032a799 ("net: phy: fix resume handling") changes the locking semantics for phy_resume() such that the caller now needs to hold the phy mutex. Not all call sites were adopted to this new semantic, resulting in warnings from the added WARN_ON(!mutex_is_locked(&phydev->lock)). Rather than change the semantics, add a __phy_resume() and restore the old behavior of phy_resume(). Reported-by: Heiner Kallweit Fixes: f5e64032a799 ("net: phy: fix resume handling") Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5a0c3e53e7c2..d7069539f351 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -924,6 +924,7 @@ void phy_device_remove(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); +int __phy_resume(struct phy_device *phydev); int phy_loopback(struct phy_device *phydev, bool enable); struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface); -- cgit v1.2.3 From 91295d79d65892eabd02a2a75fd4ac88197d17a1 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 27 Feb 2018 14:14:08 -0600 Subject: PCI: Handle FLR failure and allow other reset types pci_flr_wait() and pci_af_flr() functions assume graceful return even though the device is inaccessible under error conditions. Return -ENOTTY in error cases so that __pci_reset_function_locked() can try other reset types if AF_FLR/FLR reset fails. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 024a1beda008..af75d9d76189 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1082,7 +1082,7 @@ int pcie_get_mps(struct pci_dev *dev); int pcie_set_mps(struct pci_dev *dev, int mps); int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width); -void pcie_flr(struct pci_dev *dev); +int pcie_flr(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); int pci_reset_function_locked(struct pci_dev *dev); -- cgit v1.2.3 From 28b0f8a6962a24ed21737578f3b1b07424635c9e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Feb 2018 07:38:08 -0800 Subject: tty: make n_tty_read() always abort if hangup is in progress A tty is hung up by __tty_hangup() setting file->f_op to hung_up_tty_fops, which is skipped on ttys whose write operation isn't tty_write(). This means that, for example, /dev/console whose write op is redirected_tty_write() is never actually marked hung up. Because n_tty_read() uses the hung up status to decide whether to abort the waiting readers, the lack of hung-up marking can lead to the following scenario. 1. A session contains two processes. The leader and its child. The child ignores SIGHUP. 2. The leader exits and starts disassociating from the controlling terminal (/dev/console). 3. __tty_hangup() skips setting f_op to hung_up_tty_fops. 4. SIGHUP is delivered and ignored. 5. tty_ldisc_hangup() is invoked. It wakes up the waits which should clear the read lockers of tty->ldisc_sem. 6. The reader wakes up but because tty_hung_up_p() is false, it doesn't abort and goes back to sleep while read-holding tty->ldisc_sem. 7. The leader progresses to tty_ldisc_lock() in tty_ldisc_hangup() and is now stuck in D sleep indefinitely waiting for tty->ldisc_sem. The following is Alan's explanation on why some ttys aren't hung up. http://lkml.kernel.org/r/20171101170908.6ad08580@alans-desktop 1. It broke the serial consoles because they would hang up and close down the hardware. With tty_port that *should* be fixable properly for any cases remaining. 2. The console layer was (and still is) completely broken and doens't refcount properly. So if you turn on console hangups it breaks (as indeed does freeing consoles and half a dozen other things). As neither can be fixed quickly, this patch works around the problem by introducing a new flag, TTY_HUPPING, which is used solely to tell n_tty_read() that hang-up is in progress for the console and the readers should be aborted regardless of the hung-up status of the device. The following is a sample hung task warning caused by this issue. INFO: task agetty:2662 blocked for more than 120 seconds. Not tainted 4.11.3-dbg-tty-lockup-02478-gfd6c7ee-dirty #28 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. 0 2662 1 0x00000086 Call Trace: __schedule+0x267/0x890 schedule+0x36/0x80 schedule_timeout+0x23c/0x2e0 ldsem_down_write+0xce/0x1f6 tty_ldisc_lock+0x16/0x30 tty_ldisc_hangup+0xb3/0x1b0 __tty_hangup+0x300/0x410 disassociate_ctty+0x6c/0x290 do_exit+0x7ef/0xb00 do_group_exit+0x3f/0xa0 get_signal+0x1b3/0x5d0 do_signal+0x28/0x660 exit_to_usermode_loop+0x46/0x86 do_syscall_64+0x9c/0xb0 entry_SYSCALL64_slow_path+0x25/0x25 The following is the repro. Run "$PROG /dev/console". The parent process hangs in D state. #include #include #include #include #include #include #include #include #include #include #include #include int main(int argc, char **argv) { struct sigaction sact = { .sa_handler = SIG_IGN }; struct timespec ts1s = { .tv_sec = 1 }; pid_t pid; int fd; if (argc < 2) { fprintf(stderr, "test-hung-tty /dev/$TTY\n"); return 1; } /* fork a child to ensure that it isn't already the session leader */ pid = fork(); if (pid < 0) { perror("fork"); return 1; } if (pid > 0) { /* top parent, wait for everyone */ while (waitpid(-1, NULL, 0) >= 0) ; if (errno != ECHILD) perror("waitpid"); return 0; } /* new session, start a new session and set the controlling tty */ if (setsid() < 0) { perror("setsid"); return 1; } fd = open(argv[1], O_RDWR); if (fd < 0) { perror("open"); return 1; } if (ioctl(fd, TIOCSCTTY, 1) < 0) { perror("ioctl"); return 1; } /* fork a child, sleep a bit and exit */ pid = fork(); if (pid < 0) { perror("fork"); return 1; } if (pid > 0) { nanosleep(&ts1s, NULL); printf("Session leader exiting\n"); exit(0); } /* * The child ignores SIGHUP and keeps reading from the controlling * tty. Because SIGHUP is ignored, the child doesn't get killed on * parent exit and the bug in n_tty makes the read(2) block the * parent's control terminal hangup attempt. The parent ends up in * D sleep until the child is explicitly killed. */ sigaction(SIGHUP, &sact, NULL); printf("Child reading tty\n"); while (1) { char buf[1024]; if (read(fd, buf, sizeof(buf)) < 0) { perror("read"); return 1; } } return 0; } Signed-off-by: Tejun Heo Cc: Alan Cox Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 0a6c71e0ad01..47f8af22f216 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -364,6 +364,7 @@ struct tty_file_private { #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ +#define TTY_HUPPING 19 /* Hangup in progress */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ /* Values for tty->flow_change */ -- cgit v1.2.3 From aad76f2c48b70d993706580c254a89326ad4d7de Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 2 Feb 2018 18:46:36 +0200 Subject: serial, pci_ids: Move duplicate IDs to PCI IDs database PCI ID database is for IDs used across several drivers. Here is the case for SUNIX combo cards. No functional change intended. Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a6b30667a331..e4b0387956cf 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2556,6 +2556,9 @@ #define PCI_DEVICE_ID_TEHUTI_3010 0x3010 #define PCI_DEVICE_ID_TEHUTI_3014 0x3014 +#define PCI_VENDOR_ID_SUNIX 0x1fd4 +#define PCI_DEVICE_ID_SUNIX_1999 0x1999 + #define PCI_VENDOR_ID_HINT 0x3388 #define PCI_DEVICE_ID_HINT_VXPROII_IDE 0x8013 -- cgit v1.2.3 From a9c79364df324a69ba1b71accd5b8a3155e570ac Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 27 Feb 2018 15:53:02 +0000 Subject: phylink,sfp: negotiate interface format with MAC Negotiate the interface format with the MAC rather than requiring it to be a fixed type specified solely by the SFP module. This allows modules that can work with several different interface signalling formats to select a format compatible with the MAC - for example, a Fiber module supporing Gigabit ethernet and faster connected to a Gigabit only MAC needs to select the 1000BASE-X mode. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index e724d5a3dd80..ebce9e24906a 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -422,10 +422,11 @@ struct sfp_upstream_ops { #if IS_ENABLED(CONFIG_SFP) int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); -phy_interface_t sfp_parse_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id); void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); +phy_interface_t sfp_select_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *link_modes); int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, @@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus, return PORT_OTHER; } -static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id) -{ - return PHY_INTERFACE_MODE_NA; -} - static inline void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) { } +static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *link_modes) +{ + return PHY_INTERFACE_MODE_NA; +} + static inline int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo) { -- cgit v1.2.3 From aa4f886f3893f88146e8e02fd1e9c5c9e43cbcc1 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 28 Mar 2017 11:36:07 +0100 Subject: firmware: arm_scmi: add basic driver infrastructure for SCMI The SCMI is intended to allow OSPM to manage various functions that are provided by the hardware platform it is running on, including power and performance functions. SCMI provides two levels of abstraction, protocols and transports. Protocols define individual groups of system control and management messages. A protocol specification describes the messages that it supports. Transports describe the method by which protocol messages are communicated between agents and the platform. This patch adds basic infrastructure to manage the message allocation, initialisation, packing/unpacking and shared memory management. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/scmi_protocol.h (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h new file mode 100644 index 000000000000..1f0e89b270c6 --- /dev/null +++ b/include/linux/scmi_protocol.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SCMI Message Protocol driver header + * + * Copyright (C) 2018 ARM Ltd. + */ +#include + +/** + * struct scmi_handle - Handle returned to ARM SCMI clients for usage. + * + * @dev: pointer to the SCMI device + */ +struct scmi_handle { + struct device *dev; +}; -- cgit v1.2.3 From b6f20ff8bd94ad34032804a60bab5ee56752007e Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 6 Jun 2017 11:16:15 +0100 Subject: firmware: arm_scmi: add common infrastructure and support for base protocol The base protocol describes the properties of the implementation and provide generic error management. The base protocol provides commands to describe protocol version, discover implementation specific attributes and vendor/sub-vendor identification, list of protocols implemented and the various agents are in the system including OSPM and the platform. It also supports registering for notifications of platform errors. This protocol is mandatory. This patch adds support for the same along with some basic infrastructure to add support for other protocols. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 1f0e89b270c6..08fcc1dd0276 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -6,11 +6,48 @@ */ #include +#define SCMI_MAX_STR_SIZE 16 + +/** + * struct scmi_revision_info - version information structure + * + * @major_ver: Major ABI version. Change here implies risk of backward + * compatibility break. + * @minor_ver: Minor ABI version. Change here implies new feature addition, + * or compatible change in ABI. + * @num_protocols: Number of protocols that are implemented, excluding the + * base protocol. + * @num_agents: Number of agents in the system. + * @impl_ver: A vendor-specific implementation version. + * @vendor_id: A vendor identifier(Null terminated ASCII string) + * @sub_vendor_id: A sub-vendor identifier(Null terminated ASCII string) + */ +struct scmi_revision_info { + u16 major_ver; + u16 minor_ver; + u8 num_protocols; + u8 num_agents; + u32 impl_ver; + char vendor_id[SCMI_MAX_STR_SIZE]; + char sub_vendor_id[SCMI_MAX_STR_SIZE]; +}; + /** * struct scmi_handle - Handle returned to ARM SCMI clients for usage. * * @dev: pointer to the SCMI device + * @version: pointer to the structure containing SCMI version information */ struct scmi_handle { struct device *dev; + struct scmi_revision_info *version; +}; + +enum scmi_std_protocol { + SCMI_PROTOCOL_BASE = 0x10, + SCMI_PROTOCOL_POWER = 0x11, + SCMI_PROTOCOL_SYSTEM = 0x12, + SCMI_PROTOCOL_PERF = 0x13, + SCMI_PROTOCOL_CLOCK = 0x14, + SCMI_PROTOCOL_SENSOR = 0x15, }; -- cgit v1.2.3 From 933c504424a2bc784fdb4cd5c318049d55da20e0 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 30 Oct 2017 18:33:30 +0000 Subject: firmware: arm_scmi: add scmi protocol bus to enumerate protocol devices The SCMI specification encompasses various protocols. However, not every protocol has to be present on a given platform/implementation as not every protocol is relevant for it. Furthermore, the platform chooses which protocols it exposes to a given agent. The only protocol that must be implemented is the base protocol. The base protocol is used by an agent to discover which protocols are available to it. In order to enumerate the discovered implemented protocols, this patch adds support for a separate scmi protocol bus. It also adds mechanism to register support for different protocols. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 64 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 08fcc1dd0276..464086b9d8c5 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -4,6 +4,7 @@ * * Copyright (C) 2018 ARM Ltd. */ +#include #include #define SCMI_MAX_STR_SIZE 16 @@ -51,3 +52,66 @@ enum scmi_std_protocol { SCMI_PROTOCOL_CLOCK = 0x14, SCMI_PROTOCOL_SENSOR = 0x15, }; + +struct scmi_device { + u32 id; + u8 protocol_id; + struct device dev; + struct scmi_handle *handle; +}; + +#define to_scmi_dev(d) container_of(d, struct scmi_device, dev) + +struct scmi_device * +scmi_device_create(struct device_node *np, struct device *parent, int protocol); +void scmi_device_destroy(struct scmi_device *scmi_dev); + +struct scmi_device_id { + u8 protocol_id; +}; + +struct scmi_driver { + const char *name; + int (*probe)(struct scmi_device *sdev); + void (*remove)(struct scmi_device *sdev); + const struct scmi_device_id *id_table; + + struct device_driver driver; +}; + +#define to_scmi_driver(d) container_of(d, struct scmi_driver, driver) + +#ifdef CONFIG_ARM_SCMI_PROTOCOL +int scmi_driver_register(struct scmi_driver *driver, + struct module *owner, const char *mod_name); +void scmi_driver_unregister(struct scmi_driver *driver); +#else +static inline int +scmi_driver_register(struct scmi_driver *driver, struct module *owner, + const char *mod_name) +{ + return -EINVAL; +} + +static inline void scmi_driver_unregister(struct scmi_driver *driver) {} +#endif /* CONFIG_ARM_SCMI_PROTOCOL */ + +#define scmi_register(driver) \ + scmi_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +#define scmi_unregister(driver) \ + scmi_driver_unregister(driver) + +/** + * module_scmi_driver() - Helper macro for registering a scmi driver + * @__scmi_driver: scmi_driver structure + * + * Helper macro for scmi drivers to set up proper module init / exit + * functions. Replaces module_init() and module_exit() and keeps people from + * printing pointless things to the kernel log when their driver is loaded. + */ +#define module_scmi_driver(__scmi_driver) \ + module_driver(__scmi_driver, scmi_register, scmi_unregister) + +typedef int (*scmi_prot_init_fn_t)(struct scmi_handle *); +int scmi_protocol_register(int protocol_id, scmi_prot_init_fn_t fn); +void scmi_protocol_unregister(int protocol_id); -- cgit v1.2.3 From a9e3fbfaa0ff885aacafe6f33e72448a2993d072 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 6 Jun 2017 11:22:51 +0100 Subject: firmware: arm_scmi: add initial support for performance protocol The performance protocol is intended for the performance management of group(s) of device(s) that run in the same performance domain. It includes even the CPUs. A performance domain is defined by a set of devices that always have to run at the same performance level. For example, a set of CPUs that share a voltage domain, and have a common frequency control, is said to be in the same performance domain. The commands in this protocol provide functionality to describe the protocol version, describe various attribute flags, set and get the performance level of a domain. It also supports discovery of the list of performance levels supported by a performance domain, and the properties of each performance level. This patch adds basic support for the performance protocol. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 464086b9d8c5..57d4b1c099e5 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -33,15 +33,57 @@ struct scmi_revision_info { char sub_vendor_id[SCMI_MAX_STR_SIZE]; }; +struct scmi_handle; + +/** + * struct scmi_perf_ops - represents the various operations provided + * by SCMI Performance Protocol + * + * @limits_set: sets limits on the performance level of a domain + * @limits_get: gets limits on the performance level of a domain + * @level_set: sets the performance level of a domain + * @level_get: gets the performance level of a domain + * @device_domain_id: gets the scmi domain id for a given device + * @get_transition_latency: gets the DVFS transition latency for a given device + * @add_opps_to_device: adds all the OPPs for a given device + * @freq_set: sets the frequency for a given device using sustained frequency + * to sustained performance level mapping + * @freq_get: gets the frequency for a given device using sustained frequency + * to sustained performance level mapping + */ +struct scmi_perf_ops { + int (*limits_set)(const struct scmi_handle *handle, u32 domain, + u32 max_perf, u32 min_perf); + int (*limits_get)(const struct scmi_handle *handle, u32 domain, + u32 *max_perf, u32 *min_perf); + int (*level_set)(const struct scmi_handle *handle, u32 domain, + u32 level); + int (*level_get)(const struct scmi_handle *handle, u32 domain, + u32 *level); + int (*device_domain_id)(struct device *dev); + int (*get_transition_latency)(const struct scmi_handle *handle, + struct device *dev); + int (*add_opps_to_device)(const struct scmi_handle *handle, + struct device *dev); + int (*freq_set)(const struct scmi_handle *handle, u32 domain, + unsigned long rate); + int (*freq_get)(const struct scmi_handle *handle, u32 domain, + unsigned long *rate); +}; + /** * struct scmi_handle - Handle returned to ARM SCMI clients for usage. * * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information + * @perf_ops: pointer to set of performance protocol operations */ struct scmi_handle { struct device *dev; struct scmi_revision_info *version; + struct scmi_perf_ops *perf_ops; + /* for protocol internal use */ + void *perf_priv; }; enum scmi_std_protocol { -- cgit v1.2.3 From 5f6c6430e904d21bfe5d0076b1ff3e8b9ed94ba0 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 6 Jun 2017 11:27:57 +0100 Subject: firmware: arm_scmi: add initial support for clock protocol The clock protocol is intended for management of clocks. It is used to enable or disable clocks, and to set and get the clock rates. This protocol provides commands to describe the protocol version, discover various implementation specific attributes, describe a clock, enable and disable a clock and get/set the rate of the clock synchronously or asynchronously. This patch adds initial support for the clock protocol. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 57d4b1c099e5..5a3092f05011 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -8,6 +8,7 @@ #include #define SCMI_MAX_STR_SIZE 16 +#define SCMI_MAX_NUM_RATES 16 /** * struct scmi_revision_info - version information structure @@ -33,8 +34,48 @@ struct scmi_revision_info { char sub_vendor_id[SCMI_MAX_STR_SIZE]; }; +struct scmi_clock_info { + char name[SCMI_MAX_STR_SIZE]; + bool rate_discrete; + union { + struct { + int num_rates; + u64 rates[SCMI_MAX_NUM_RATES]; + } list; + struct { + u64 min_rate; + u64 max_rate; + u64 step_size; + } range; + }; +}; + struct scmi_handle; +/** + * struct scmi_clk_ops - represents the various operations provided + * by SCMI Clock Protocol + * + * @count_get: get the count of clocks provided by SCMI + * @info_get: get the information of the specified clock + * @rate_get: request the current clock rate of a clock + * @rate_set: set the clock rate of a clock + * @enable: enables the specified clock + * @disable: disables the specified clock + */ +struct scmi_clk_ops { + int (*count_get)(const struct scmi_handle *handle); + + const struct scmi_clock_info *(*info_get) + (const struct scmi_handle *handle, u32 clk_id); + int (*rate_get)(const struct scmi_handle *handle, u32 clk_id, + u64 *rate); + int (*rate_set)(const struct scmi_handle *handle, u32 clk_id, + u32 config, u64 rate); + int (*enable)(const struct scmi_handle *handle, u32 clk_id); + int (*disable)(const struct scmi_handle *handle, u32 clk_id); +}; + /** * struct scmi_perf_ops - represents the various operations provided * by SCMI Performance Protocol @@ -77,13 +118,16 @@ struct scmi_perf_ops { * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information * @perf_ops: pointer to set of performance protocol operations + * @clk_ops: pointer to set of clock protocol operations */ struct scmi_handle { struct device *dev; struct scmi_revision_info *version; struct scmi_perf_ops *perf_ops; + struct scmi_clk_ops *clk_ops; /* for protocol internal use */ void *perf_priv; + void *clk_priv; }; enum scmi_std_protocol { -- cgit v1.2.3 From 76a6550990e296a7acbb4d33201c9740be912a8c Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 6 Jun 2017 11:32:24 +0100 Subject: firmware: arm_scmi: add initial support for power protocol The power protocol is intended for management of power states of various power domains. The power domain management protocol provides commands to describe the protocol version, discover the implementation specific attributes, set and get the power state of a domain. This patch adds support for the above mention features of the protocol. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla -- drivers/firmware/arm_scmi/Makefile | 2 +- drivers/firmware/arm_scmi/power.c | 242 +++++++++++++++++++++++++++++++++++++ include/linux/scmi_protocol.h | 28 +++++ 3 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 drivers/firmware/arm_scmi/power.c --- include/linux/scmi_protocol.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 5a3092f05011..8cd0348787bc 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -112,11 +112,37 @@ struct scmi_perf_ops { unsigned long *rate); }; +/** + * struct scmi_power_ops - represents the various operations provided + * by SCMI Power Protocol + * + * @num_domains_get: get the count of power domains provided by SCMI + * @name_get: gets the name of a power domain + * @state_set: sets the power state of a power domain + * @state_get: gets the power state of a power domain + */ +struct scmi_power_ops { + int (*num_domains_get)(const struct scmi_handle *handle); + char *(*name_get)(const struct scmi_handle *handle, u32 domain); +#define SCMI_POWER_STATE_TYPE_SHIFT 30 +#define SCMI_POWER_STATE_ID_MASK (BIT(28) - 1) +#define SCMI_POWER_STATE_PARAM(type, id) \ + ((((type) & BIT(0)) << SCMI_POWER_STATE_TYPE_SHIFT) | \ + ((id) & SCMI_POWER_STATE_ID_MASK)) +#define SCMI_POWER_STATE_GENERIC_ON SCMI_POWER_STATE_PARAM(0, 0) +#define SCMI_POWER_STATE_GENERIC_OFF SCMI_POWER_STATE_PARAM(1, 0) + int (*state_set)(const struct scmi_handle *handle, u32 domain, + u32 state); + int (*state_get)(const struct scmi_handle *handle, u32 domain, + u32 *state); +}; + /** * struct scmi_handle - Handle returned to ARM SCMI clients for usage. * * @dev: pointer to the SCMI device * @version: pointer to the structure containing SCMI version information + * @power_ops: pointer to set of power protocol operations * @perf_ops: pointer to set of performance protocol operations * @clk_ops: pointer to set of clock protocol operations */ @@ -125,9 +151,11 @@ struct scmi_handle { struct scmi_revision_info *version; struct scmi_perf_ops *perf_ops; struct scmi_clk_ops *clk_ops; + struct scmi_power_ops *power_ops; /* for protocol internal use */ void *perf_priv; void *clk_priv; + void *power_priv; }; enum scmi_std_protocol { -- cgit v1.2.3 From 5179c523c1eae4b80fbafe9656bc24a375217cac Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 6 Jun 2017 11:38:10 +0100 Subject: firmware: arm_scmi: add initial support for sensor protocol The sensor protocol provides functions to manage platform sensors, and provides the commands to describe the protocol version and the various attribute flags. It also provides commands to discover various sensors implemented and managed by the platform, read any sensor synchronously or asynchronously as allowed by the platform, program sensor attributes and/or configurations, if applicable. This patch adds support for most of the above features. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 46 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 8cd0348787bc..5d63da9435ba 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -137,6 +137,49 @@ struct scmi_power_ops { u32 *state); }; +struct scmi_sensor_info { + u32 id; + u8 type; + char name[SCMI_MAX_STR_SIZE]; +}; + +/* + * Partial list from Distributed Management Task Force (DMTF) specification: + * DSP0249 (Platform Level Data Model specification) + */ +enum scmi_sensor_class { + NONE = 0x0, + TEMPERATURE_C = 0x2, + VOLTAGE = 0x5, + CURRENT = 0x6, + POWER = 0x7, + ENERGY = 0x8, +}; + +/** + * struct scmi_sensor_ops - represents the various operations provided + * by SCMI Sensor Protocol + * + * @count_get: get the count of sensors provided by SCMI + * @info_get: get the information of the specified sensor + * @configuration_set: control notifications on cross-over events for + * the trip-points + * @trip_point_set: selects and configures a trip-point of interest + * @reading_get: gets the current value of the sensor + */ +struct scmi_sensor_ops { + int (*count_get)(const struct scmi_handle *handle); + + const struct scmi_sensor_info *(*info_get) + (const struct scmi_handle *handle, u32 sensor_id); + int (*configuration_set)(const struct scmi_handle *handle, + u32 sensor_id); + int (*trip_point_set)(const struct scmi_handle *handle, u32 sensor_id, + u8 trip_id, u64 trip_value); + int (*reading_get)(const struct scmi_handle *handle, u32 sensor_id, + bool async, u64 *value); +}; + /** * struct scmi_handle - Handle returned to ARM SCMI clients for usage. * @@ -145,6 +188,7 @@ struct scmi_power_ops { * @power_ops: pointer to set of power protocol operations * @perf_ops: pointer to set of performance protocol operations * @clk_ops: pointer to set of clock protocol operations + * @sensor_ops: pointer to set of sensor protocol operations */ struct scmi_handle { struct device *dev; @@ -152,10 +196,12 @@ struct scmi_handle { struct scmi_perf_ops *perf_ops; struct scmi_clk_ops *clk_ops; struct scmi_power_ops *power_ops; + struct scmi_sensor_ops *sensor_ops; /* for protocol internal use */ void *perf_priv; void *clk_priv; void *power_priv; + void *sensor_priv; }; enum scmi_std_protocol { -- cgit v1.2.3 From 5c4ba3cc85296398855d621bf90b78866ea80444 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 21 Jul 2017 11:42:24 +0100 Subject: firmware: arm_scmi: add option for polling based performance domain operations In order to implement fast CPU DVFS switching, we need to perform all DVFS operations atomically. Since SCMI transfer already provide option to choose between pooling vs interrupt driven(default), we can opt for polling based transfers for set,get performance domain operations. This patch adds option to choose between polling vs interrupt driven SCMI transfers for set,get performance level operations. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 5d63da9435ba..b458c87b866c 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -98,18 +98,18 @@ struct scmi_perf_ops { int (*limits_get)(const struct scmi_handle *handle, u32 domain, u32 *max_perf, u32 *min_perf); int (*level_set)(const struct scmi_handle *handle, u32 domain, - u32 level); + u32 level, bool poll); int (*level_get)(const struct scmi_handle *handle, u32 domain, - u32 *level); + u32 *level, bool poll); int (*device_domain_id)(struct device *dev); int (*get_transition_latency)(const struct scmi_handle *handle, struct device *dev); int (*add_opps_to_device)(const struct scmi_handle *handle, struct device *dev); int (*freq_set)(const struct scmi_handle *handle, u32 domain, - unsigned long rate); + unsigned long rate, bool poll); int (*freq_get)(const struct scmi_handle *handle, u32 domain, - unsigned long *rate); + unsigned long *rate, bool poll); }; /** -- cgit v1.2.3 From d57538004b2e57be6a5d8583b65d1b049245abf7 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 27 Sep 2017 16:20:50 +0100 Subject: hwmon: (core) Add hwmon_max to hwmon_sensor_types enumeration It's useful to know the maximum types of sensor supported by hwmon framework. It can be used to allocate some data structures when sorting the monitors based on their type. This will be used by scmi hwmon support. Cc: linux-hwmon@vger.kernel.org Acked-by: Guenter Roeck Signed-off-by: Sudeep Holla --- include/linux/hwmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index ceb751987c40..e5fd2707b6df 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -29,6 +29,7 @@ enum hwmon_sensor_types { hwmon_humidity, hwmon_fan, hwmon_pwm, + hwmon_max, }; enum hwmon_chip_attributes { -- cgit v1.2.3 From 723fbf563a6a9cefbd3c58e95694583ad1cb8704 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 15 Feb 2018 09:03:56 +0530 Subject: lib/scatterlist: Add SG_CHAIN and SG_END macros for LSB encodings This replaces scatterlist->page_link LSB encodings with SG_CHAIN and SG_END definitions without any functional change. Signed-off-by: Anshuman Khandual Signed-off-by: Jens Axboe --- include/linux/scatterlist.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 22b2131bcdcd..b6fe1815f5c4 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -65,16 +65,18 @@ struct sg_table { */ #define SG_MAGIC 0x87654321 +#define SG_CHAIN 0x01UL +#define SG_END 0x02UL /* * We overload the LSB of the page pointer to indicate whether it's * a valid sg entry, or whether it points to the start of a new scatterlist. * Those low bits are there for everyone! (thanks mason :-) */ -#define sg_is_chain(sg) ((sg)->page_link & 0x01) -#define sg_is_last(sg) ((sg)->page_link & 0x02) +#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN) +#define sg_is_last(sg) ((sg)->page_link & SG_END) #define sg_chain_ptr(sg) \ - ((struct scatterlist *) ((sg)->page_link & ~0x03)) + ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END))) /** * sg_assign_page - Assign a given page to an SG entry @@ -88,13 +90,13 @@ struct sg_table { **/ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) { - unsigned long page_link = sg->page_link & 0x3; + unsigned long page_link = sg->page_link & (SG_CHAIN | SG_END); /* * In order for the low bit stealing approach to work, pages * must be aligned at a 32-bit boundary as a minimum. */ - BUG_ON((unsigned long) page & 0x03); + BUG_ON((unsigned long) page & (SG_CHAIN | SG_END)); #ifdef CONFIG_DEBUG_SG BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); @@ -130,7 +132,7 @@ static inline struct page *sg_page(struct scatterlist *sg) BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif - return (struct page *)((sg)->page_link & ~0x3); + return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END)); } /** @@ -178,7 +180,8 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, * Set lowest bit to indicate a link pointer, and make sure to clear * the termination bit if it happens to be set. */ - prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02; + prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN) + & ~SG_END; } /** @@ -198,8 +201,8 @@ static inline void sg_mark_end(struct scatterlist *sg) /* * Set termination bit, clear potential chain bit */ - sg->page_link |= 0x02; - sg->page_link &= ~0x01; + sg->page_link |= SG_END; + sg->page_link &= ~SG_CHAIN; } /** @@ -215,7 +218,7 @@ static inline void sg_unmark_end(struct scatterlist *sg) #ifdef CONFIG_DEBUG_SG BUG_ON(sg->sg_magic != SG_MAGIC); #endif - sg->page_link &= ~0x02; + sg->page_link &= ~SG_END; } /** -- cgit v1.2.3 From 4ace53f1ed40a5cfee4bdd7614c8a8b2798227ad Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 27 Feb 2018 16:56:43 -0800 Subject: sbitmap: use test_and_set_bit_lock()/clear_bit_unlock() sbitmap_queue_get()/sbitmap_queue_clear() are used for allocating/freeing a resource, so they should provide acquire/release barrier semantics, respectively. sbitmap_get() currently contains a full barrier, which is unnecessary, so use test_and_set_bit_lock() instead of test_and_set_bit() (these are equivalent on x86_64). sbitmap_clear_bit() does not imply any barriers, which is incorrect, as accesses of the resource (e.g., request) could potentially get reordered to after the clear_bit(). Introduce sbitmap_clear_bit_unlock() and use it for sbitmap_queue_clear() (this only adds a compiler barrier on x86_64). The other existing user of sbitmap_clear_bit() (the blk-mq software queue pending map) is serialized through a spinlock and does not need this. Reported-by: Tejun Heo Acked-by: Tejun Heo Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 0dcc60e820de..841585f6e5f2 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -171,6 +171,8 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); * starting from the last allocated bit. This is less efficient * than the default behavior (false). * + * This operation provides acquire barrier semantics if it succeeds. + * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); @@ -300,6 +302,12 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } +static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, + unsigned int bitnr) +{ + clear_bit_unlock(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); +} + static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) { return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); -- cgit v1.2.3 From 5ee0524ba137fe928a88b440d014e3c8451fb32c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Feb 2018 10:15:31 -0800 Subject: block: Add 'lock' as third argument to blk_alloc_queue_node() This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Joseph Qi Cc: Christoph Hellwig Cc: Philipp Reisner Cc: Ulf Hansson Cc: Kees Cook Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ed63f3b69c12..667a9b0053d9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1321,7 +1321,8 @@ extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t, int); +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, + spinlock_t *lock); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); -- cgit v1.2.3 From 25e3fca492035a2e1d4ac6e3b1edd9c1acd48897 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 4 Feb 2018 23:07:46 +0100 Subject: random: always fill buffer in get_random_bytes_wait In the unfortunate event that a developer fails to check the return value of get_random_bytes_wait, or simply wants to make a "best effort" attempt, for whatever that's worth, it's much better to still fill the buffer with _something_ rather than catastrophically failing in the case of an interruption. This is both a defense in depth measure against inevitable programming bugs, as well as a means of making the API a bit more useful. Signed-off-by: Jason A. Donenfeld Signed-off-by: Theodore Ts'o --- include/linux/random.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 4024f7d9c77d..2ddf13b4281e 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -85,10 +85,8 @@ static inline unsigned long get_random_canary(void) static inline int get_random_bytes_wait(void *buf, int nbytes) { int ret = wait_for_random_bytes(); - if (unlikely(ret)) - return ret; get_random_bytes(buf, nbytes); - return 0; + return ret; } #define declare_get_random_var_wait(var) \ -- cgit v1.2.3 From fd5cd21d995e67f87b3eb4adf938be85fe83ef4b Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 12 Feb 2018 23:47:19 +0100 Subject: rtc: export rtc_nvmem_register() to drivers Export rtc_nvmem_register() so it can be called from drivers instead of only the core. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index fc6c90b57be0..fbc92fff7c2e 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -271,4 +271,17 @@ extern int rtc_hctosys_ret; #define rtc_hctosys_ret -ENODEV #endif +#ifdef CONFIG_RTC_NVMEM +int rtc_nvmem_register(struct rtc_device *rtc, + struct nvmem_config *nvmem_config); +void rtc_nvmem_unregister(struct rtc_device *rtc); +#else +static inline int rtc_nvmem_register(struct rtc_device *rtc, + struct nvmem_config *nvmem_config) +{ + return -ENODEV; +} +static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {} +#endif + #endif /* _LINUX_RTC_H_ */ -- cgit v1.2.3 From 0391df74a608e4e65c29ddf80e704edfa8f8ef25 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 12 Feb 2018 23:47:34 +0100 Subject: rtc: remove nvmem_config Because nvmem_config is only used and copied at nvmem registration, remove it from struct rtc_device. All the rtc drivers using nvmem are now calling rtc_nvmem_register directly. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index fbc92fff7c2e..37b041f72f8d 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -145,7 +145,6 @@ struct rtc_device { bool registered; - struct nvmem_config *nvmem_config; struct nvmem_device *nvmem; /* Old ABI support */ bool nvram_old_abi; -- cgit v1.2.3 From 9e7002a70e4294a093b3cacf2346af33aeefd265 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 6 Feb 2018 23:12:26 +0100 Subject: char: rtc: remove unused rtc_control() API Since commit 34ce71a96dcb ("ALSA: timer: remove legacy rtctimer"), the rtc_register/rtc_control/rtc_unregister API is unused. As it is highly unlikely to be needed again, remove it. Acked-by: Greg Kroah-Hartman Acked-by: Arnd Bergmann Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 37b041f72f8d..3b65b201169c 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -211,10 +211,6 @@ void rtc_aie_update_irq(void *private); void rtc_uie_update_irq(void *private); enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer); -int rtc_register(rtc_task_t *task); -int rtc_unregister(rtc_task_t *task); -int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); - void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data); int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer, ktime_t expires, ktime_t period); -- cgit v1.2.3 From f16ee7c7ec0fa5f0322bd64d5ee183a28ed1ec08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 1 Mar 2018 11:31:28 +0100 Subject: misc: rtsx: rename SG_END macro A change to the generic scatterlist code caused a conflict with the rtsx card reader driver: In file included from drivers/misc/cardreader/rtsx_pcr.c:32: include/linux/rtsx_pci.h:40: error: "SG_END" redefined [-Werror] This changes one instance of the driver to prefix SG_END and related constants. Fixes: 723fbf563a6a ("lib/scatterlist: Add SG_CHAIN and SG_END macros for LSB encodings") Cc: Anshuman Khandual Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- include/linux/rtsx_pci.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 478acf6efac6..e964bbd03fc2 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -36,12 +36,12 @@ #define CHECK_REG_CMD 2 #define RTSX_HDBAR 0x08 -#define SG_INT 0x04 -#define SG_END 0x02 -#define SG_VALID 0x01 -#define SG_NO_OP 0x00 -#define SG_TRANS_DATA (0x02 << 4) -#define SG_LINK_DESC (0x03 << 4) +#define RTSX_SG_INT 0x04 +#define RTSX_SG_END 0x02 +#define RTSX_SG_VALID 0x01 +#define RTSX_SG_NO_OP 0x00 +#define RTSX_SG_TRANS_DATA (0x02 << 4) +#define RTSX_SG_LINK_DESC (0x03 << 4) #define RTSX_HDBCTLR 0x0C #define SDMA_MODE 0x00 #define ADMA_MODE (0x02 << 26) -- cgit v1.2.3 From 6853f21f764b04e58df5e44629fec1fb8f3cbf2e Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:29 +0200 Subject: ipmr,ipmr6: Define a uniform vif_device The two implementations have almost identical structures - vif_device and mif_device. As a step toward uniforming the mr_tables, eliminate the mif_device and relocate the vif_device definition into a new common header file. Also, introduce a common initializing function for setting most of the vif_device fields in a new common source file. This requires modifying the ipv{4,6] Kconfig and ipv4 makefile as we're introducing a new common config option - CONFIG_IP_MROUTE_COMMON. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 13 +----------- include/linux/mroute6.h | 11 +--------- include/linux/mroute_base.h | 52 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 22 deletions(-) create mode 100644 include/linux/mroute_base.h (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 5396521a776a..b8aadffe6237 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) @@ -56,18 +57,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule) } #endif -struct vif_device { - struct net_device *dev; /* Device we are using */ - struct netdev_phys_item_id dev_parent_id; /* Device parent ID */ - unsigned long bytes_in,bytes_out; - unsigned long pkt_in,pkt_out; /* Statistics */ - unsigned long rate_limit; /* Traffic shaping (NI) */ - unsigned char threshold; /* TTL threshold */ - unsigned short flags; /* Control flags */ - __be32 local,remote; /* Addresses(remote for tunnels)*/ - int link; /* Physical interface index */ -}; - struct vif_entry_notifier_info { struct fib_notifier_info info; struct net_device *dev; diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 3014c52bfd86..e5e5b8282551 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -7,6 +7,7 @@ #include /* for struct sk_buff_head */ #include #include +#include #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) @@ -62,16 +63,6 @@ static inline void ip6_mr_cleanup(void) } #endif -struct mif_device { - struct net_device *dev; /* Device we are using */ - unsigned long bytes_in,bytes_out; - unsigned long pkt_in,pkt_out; /* Statistics */ - unsigned long rate_limit; /* Traffic shaping (NI) */ - unsigned char threshold; /* TTL threshold */ - unsigned short flags; /* Control flags */ - int link; /* Physical interface index */ -}; - #define VIFF_STATIC 0x8000 struct mfc6_cache { diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h new file mode 100644 index 000000000000..0de651e15f27 --- /dev/null +++ b/include/linux/mroute_base.h @@ -0,0 +1,52 @@ +#ifndef __LINUX_MROUTE_BASE_H +#define __LINUX_MROUTE_BASE_H + +#include + +/** + * struct vif_device - interface representor for multicast routing + * @dev: network device being used + * @bytes_in: statistic; bytes ingressing + * @bytes_out: statistic; bytes egresing + * @pkt_in: statistic; packets ingressing + * @pkt_out: statistic; packets egressing + * @rate_limit: Traffic shaping (NI) + * @threshold: TTL threshold + * @flags: Control flags + * @link: Physical interface index + * @dev_parent_id: device parent id + * @local: Local address + * @remote: Remote address for tunnels + */ +struct vif_device { + struct net_device *dev; + unsigned long bytes_in, bytes_out; + unsigned long pkt_in, pkt_out; + unsigned long rate_limit; + unsigned char threshold; + unsigned short flags; + int link; + + /* Currently only used by ipmr */ + struct netdev_phys_item_id dev_parent_id; + __be32 local, remote; +}; + +#ifdef CONFIG_IP_MROUTE_COMMON +void vif_device_init(struct vif_device *v, + struct net_device *dev, + unsigned long rate_limit, + unsigned char threshold, + unsigned short flags, + unsigned short get_iflink_mask); +#else +static inline void vif_device_init(struct vif_device *v, + struct net_device *dev, + unsigned long rate_limit, + unsigned char threshold, + unsigned short flags, + unsigned short get_iflink_mask) +{ +} +#endif +#endif -- cgit v1.2.3 From 8571ab479a6e1ef46ead5ebee567e128a422767c Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:30 +0200 Subject: ip6mr: Make mroute_sk rcu-based In ipmr the mr_table socket is handled under RCU. Introduce the same for ip6mr. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute6.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index e5e5b8282551..e1b9fb06e1ea 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -111,12 +111,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, u32 portid); #ifdef CONFIG_IPV6_MROUTE -extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb); +bool mroute6_is_socket(struct net *net, struct sk_buff *skb); extern int ip6mr_sk_done(struct sock *sk); #else -static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) +static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb) { - return NULL; + return false; } static inline int ip6mr_sk_done(struct sock *sk) { -- cgit v1.2.3 From 87c418bf1323d57140f4b448715f64de3fbb7e91 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:31 +0200 Subject: ip6mr: Align hash implementation to ipmr Since commit 8fb472c09b9d ("ipmr: improve hash scalability") ipmr has been using rhashtable as a basis for its mfc routes, but ip6mr is currently still using the old private MFC hash implementation. Align ip6mr to the current ipmr implementation. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute6.h | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index e1b9fb06e1ea..e2dac199861e 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) @@ -65,10 +66,20 @@ static inline void ip6_mr_cleanup(void) #define VIFF_STATIC 0x8000 +struct mfc6_cache_cmp_arg { + struct in6_addr mf6c_mcastgrp; + struct in6_addr mf6c_origin; +}; + struct mfc6_cache { - struct list_head list; - struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */ - struct in6_addr mf6c_origin; /* Source of packet */ + struct rhlist_head mnode; + union { + struct { + struct in6_addr mf6c_mcastgrp; + struct in6_addr mf6c_origin; + }; + struct mfc6_cache_cmp_arg cmparg; + }; mifi_t mf6c_parent; /* Source interface */ int mfc_flags; /* Flags on line */ @@ -88,22 +99,13 @@ struct mfc6_cache { unsigned char ttls[MAXMIFS]; /* TTL thresholds */ } res; } mfc_un; + struct list_head list; + struct rcu_head rcu; }; #define MFC_STATIC 1 #define MFC_NOTIFY 2 -#define MFC6_LINES 64 - -#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \ - (__force u32)(a)->s6_addr32[1] ^ \ - (__force u32)(a)->s6_addr32[2] ^ \ - (__force u32)(a)->s6_addr32[3] ^ \ - (__force u32)(g)->s6_addr32[0] ^ \ - (__force u32)(g)->s6_addr32[1] ^ \ - (__force u32)(g)->s6_addr32[2] ^ \ - (__force u32)(g)->s6_addr32[3]) % MFC6_LINES) - #define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ struct rtmsg; -- cgit v1.2.3 From b70432f7319eb75b24ca57dde8146c5e27244780 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:32 +0200 Subject: mroute*: Make mr_table a common struct Following previous changes to ip6mr, mr_table and mr6_table are basically the same [up to mr6_table having additional '6' suffixes to its variable names]. Move the common structure definition into a common header; This requires renaming all references in ip6mr to variables that had the distinct suffix. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 21 --------------------- include/linux/mroute6.h | 1 - include/linux/mroute_base.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index b8aadffe6237..8688c5d03a24 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -4,8 +4,6 @@ #include #include -#include -#include #include #include #include @@ -67,25 +65,6 @@ struct vif_entry_notifier_info { #define VIFF_STATIC 0x8000 -#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) - -struct mr_table { - struct list_head list; - possible_net_t net; - u32 id; - struct sock __rcu *mroute_sk; - struct timer_list ipmr_expire_timer; - struct list_head mfc_unres_queue; - struct vif_device vif_table[MAXVIFS]; - struct rhltable mfc_hash; - struct list_head mfc_cache_list; - int maxvif; - atomic_t cache_resolve_queue_len; - bool mroute_do_assert; - bool mroute_do_pim; - int mroute_reg_vif_num; -}; - /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) * MFC_OFFLOAD - the entry was offloaded to the hardware diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index e2dac199861e..d5c8dc155a42 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -8,7 +8,6 @@ #include #include #include -#include #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 0de651e15f27..1cc944a14df5 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -2,6 +2,9 @@ #define __LINUX_MROUTE_BASE_H #include +#include +#include +#include /** * struct vif_device - interface representor for multicast routing @@ -32,6 +35,49 @@ struct vif_device { __be32 local, remote; }; +#ifndef MAXVIFS +/* This one is nasty; value is defined in uapi using different symbols for + * mroute and morute6 but both map into same 32. + */ +#define MAXVIFS 32 +#endif + +#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev)) + +/** + * struct mr_table - a multicast routing table + * @list: entry within a list of multicast routing tables + * @net: net where this table belongs + * @id: identifier of the table + * @mroute_sk: socket associated with the table + * @ipmr_expire_timer: timer for handling unresolved routes + * @mfc_unres_queue: list of unresolved MFC entries + * @vif_table: array containing all possible vifs + * @mfc_hash: Hash table of all resolved routes for easy lookup + * @mfc_cache_list: list of resovled routes for possible traversal + * @maxvif: Identifier of highest value vif currently in use + * @cache_resolve_queue_len: current size of unresolved queue + * @mroute_do_assert: Whether to inform userspace on wrong ingress + * @mroute_do_pim: Whether to receive IGMP PIMv1 + * @mroute_reg_vif_num: PIM-device vif index + */ +struct mr_table { + struct list_head list; + possible_net_t net; + u32 id; + struct sock __rcu *mroute_sk; + struct timer_list ipmr_expire_timer; + struct list_head mfc_unres_queue; + struct vif_device vif_table[MAXVIFS]; + struct rhltable mfc_hash; + struct list_head mfc_cache_list; + int maxvif; + atomic_t cache_resolve_queue_len; + bool mroute_do_assert; + bool mroute_do_pim; + int mroute_reg_vif_num; +}; + #ifdef CONFIG_IP_MROUTE_COMMON void vif_device_init(struct vif_device *v, struct net_device *dev, -- cgit v1.2.3 From 0bbbf0e7d0e7ea8267836986346a9b3a35b74e4e Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:33 +0200 Subject: ipmr, ip6mr: Unite creation of new mr_table Now that both ipmr and ip6mr are using the same mr_table structure, we can have a common function to allocate & initialize a new instance. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 1cc944a14df5..805305722803 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -85,6 +85,13 @@ void vif_device_init(struct vif_device *v, unsigned char threshold, unsigned short flags, unsigned short get_iflink_mask); + +struct mr_table * +mr_table_alloc(struct net *net, u32 id, + const struct rhashtable_params *rht_params, + void (*expire_func)(struct timer_list *t), + void (*table_set)(struct mr_table *mrt, + struct net *net)); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -94,5 +101,15 @@ static inline void vif_device_init(struct vif_device *v, unsigned short get_iflink_mask) { } + +static inline struct mr_table * +mr_table_alloc(struct net *net, u32 id, + const struct rhashtable_params *rht_params, + void (*expire_func)(struct timer_list *t), + void (*table_set)(struct mr_table *mrt, + struct net *net)) +{ + return NULL; +} #endif #endif -- cgit v1.2.3 From 494fff56379c4ad5b8fe36a5b7ffede4044ca7bb Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:34 +0200 Subject: ipmr, ip6mr: Make mfc_cache a common structure mfc_cache and mfc6_cache are almost identical - the main difference is in the origin/group addresses and comparison-key. Make a common structure encapsulating most of the multicast routing logic - mr_mfc and convert both ipmr and ip6mr into using it. For easy conversion [casting, in this case] mr_mfc has to be the first field inside every multicast routing abstraction utilizing it. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 45 ++++----------------------------------------- include/linux/mroute6.h | 23 +---------------------- include/linux/mroute_base.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 8688c5d03a24..63b36e6c72a0 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -81,28 +81,13 @@ struct mfc_cache_cmp_arg { /** * struct mfc_cache - multicast routing entries - * @mnode: rhashtable list + * @_c: Common multicast routing information; has to be first [for casting] * @mfc_mcastgrp: destination multicast group address * @mfc_origin: source address * @cmparg: used for rhashtable comparisons - * @mfc_parent: source interface (iif) - * @mfc_flags: entry flags - * @expires: unresolved entry expire time - * @unresolved: unresolved cached skbs - * @last_assert: time of last assert - * @minvif: minimum VIF id - * @maxvif: maximum VIF id - * @bytes: bytes that have passed for this entry - * @pkt: packets that have passed for this entry - * @wrong_if: number of wrong source interface hits - * @lastuse: time of last use of the group (traffic or update) - * @ttls: OIF TTL threshold array - * @refcount: reference count for this entry - * @list: global entry list - * @rcu: used for entry destruction */ struct mfc_cache { - struct rhlist_head mnode; + struct mr_mfc _c; union { struct { __be32 mfc_mcastgrp; @@ -110,28 +95,6 @@ struct mfc_cache { }; struct mfc_cache_cmp_arg cmparg; }; - vifi_t mfc_parent; - int mfc_flags; - - union { - struct { - unsigned long expires; - struct sk_buff_head unresolved; - } unres; - struct { - unsigned long last_assert; - int minvif; - int maxvif; - unsigned long bytes; - unsigned long pkt; - unsigned long wrong_if; - unsigned long lastuse; - unsigned char ttls[MAXVIFS]; - refcount_t refcount; - } res; - } mfc_un; - struct list_head list; - struct rcu_head rcu; }; struct mfc_entry_notifier_info { @@ -155,12 +118,12 @@ static inline void ipmr_cache_free(struct mfc_cache *mfc_cache) static inline void ipmr_cache_put(struct mfc_cache *c) { - if (refcount_dec_and_test(&c->mfc_un.res.refcount)) + if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount)) ipmr_cache_free(c); } static inline void ipmr_cache_hold(struct mfc_cache *c) { - refcount_inc(&c->mfc_un.res.refcount); + refcount_inc(&c->_c.mfc_un.res.refcount); } #endif diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index d5c8dc155a42..6acf576fc135 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -71,7 +71,7 @@ struct mfc6_cache_cmp_arg { }; struct mfc6_cache { - struct rhlist_head mnode; + struct mr_mfc _c; union { struct { struct in6_addr mf6c_mcastgrp; @@ -79,27 +79,6 @@ struct mfc6_cache { }; struct mfc6_cache_cmp_arg cmparg; }; - mifi_t mf6c_parent; /* Source interface */ - int mfc_flags; /* Flags on line */ - - union { - struct { - unsigned long expires; - struct sk_buff_head unresolved; /* Unresolved buffers */ - } unres; - struct { - unsigned long last_assert; - int minvif; - int maxvif; - unsigned long bytes; - unsigned long pkt; - unsigned long wrong_if; - unsigned long lastuse; - unsigned char ttls[MAXMIFS]; /* TTL thresholds */ - } res; - } mfc_un; - struct list_head list; - struct rcu_head rcu; }; #define MFC_STATIC 1 diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 805305722803..2769e2f98b32 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -44,6 +44,51 @@ struct vif_device { #define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev)) +/** + * struct mr_mfc - common multicast routing entries + * @mnode: rhashtable list + * @mfc_parent: source interface (iif) + * @mfc_flags: entry flags + * @expires: unresolved entry expire time + * @unresolved: unresolved cached skbs + * @last_assert: time of last assert + * @minvif: minimum VIF id + * @maxvif: maximum VIF id + * @bytes: bytes that have passed for this entry + * @pkt: packets that have passed for this entry + * @wrong_if: number of wrong source interface hits + * @lastuse: time of last use of the group (traffic or update) + * @ttls: OIF TTL threshold array + * @refcount: reference count for this entry + * @list: global entry list + * @rcu: used for entry destruction + */ +struct mr_mfc { + struct rhlist_head mnode; + unsigned short mfc_parent; + int mfc_flags; + + union { + struct { + unsigned long expires; + struct sk_buff_head unresolved; + } unres; + struct { + unsigned long last_assert; + int minvif; + int maxvif; + unsigned long bytes; + unsigned long pkt; + unsigned long wrong_if; + unsigned long lastuse; + unsigned char ttls[MAXVIFS]; + refcount_t refcount; + } res; + } mfc_un; + struct list_head list; + struct rcu_head rcu; +}; + /** * struct mr_table - a multicast routing table * @list: entry within a list of multicast routing tables -- cgit v1.2.3 From 845c9a7ae7f5342ba42280c3a2f2aa92bce641d7 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:35 +0200 Subject: ipmr, ip6mr: Unite logic for searching in MFC cache ipmr and ip6mr utilize the exact same methods for searching the hashed resolved connections, difference being only in the construction of the hash comparison key. In order to unite the flow, introduce an mr_table operation set that would contain the protocol specific information required for common flows, in this case - the hash parameters and a comparison key representing a (*,*) route. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 52 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 2769e2f98b32..46a082e25dab 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -89,10 +89,23 @@ struct mr_mfc { struct rcu_head rcu; }; +struct mr_table; + +/** + * struct mr_table_ops - callbacks and info for protocol-specific ops + * @rht_params: parameters for accessing the MFC hash + * @cmparg_any: a hash key to be used for matching on (*,*) routes + */ +struct mr_table_ops { + const struct rhashtable_params *rht_params; + void *cmparg_any; +}; + /** * struct mr_table - a multicast routing table * @list: entry within a list of multicast routing tables * @net: net where this table belongs + * @ops: protocol specific operations * @id: identifier of the table * @mroute_sk: socket associated with the table * @ipmr_expire_timer: timer for handling unresolved routes @@ -109,6 +122,7 @@ struct mr_mfc { struct mr_table { struct list_head list; possible_net_t net; + struct mr_table_ops ops; u32 id; struct sock __rcu *mroute_sk; struct timer_list ipmr_expire_timer; @@ -133,10 +147,19 @@ void vif_device_init(struct vif_device *v, struct mr_table * mr_table_alloc(struct net *net, u32 id, - const struct rhashtable_params *rht_params, + struct mr_table_ops *ops, void (*expire_func)(struct timer_list *t), void (*table_set)(struct mr_table *mrt, struct net *net)); + +/* These actually return 'struct mr_mfc *', but to avoid need for explicit + * castings they simply return void. + */ +void *mr_mfc_find_parent(struct mr_table *mrt, + void *hasharg, int parent); +void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi); +void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg); + #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -147,14 +170,37 @@ static inline void vif_device_init(struct vif_device *v, { } -static inline struct mr_table * +static inline void * mr_table_alloc(struct net *net, u32 id, - const struct rhashtable_params *rht_params, + struct mr_table_ops *ops, void (*expire_func)(struct timer_list *t), void (*table_set)(struct mr_table *mrt, struct net *net)) { return NULL; } + +static inline void *mr_mfc_find_parent(struct mr_table *mrt, + void *hasharg, int parent) +{ + return NULL; +} + +static inline void *mr_mfc_find_any_parent(struct mr_table *mrt, + int vifi) +{ + return NULL; +} + +static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt, + int vifi, void *hasharg) +{ + return NULL; +} #endif + +static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg) +{ + return mr_mfc_find_parent(mrt, hasharg, -1); +} #endif -- cgit v1.2.3 From c8d6196803265484f7e1cdd1b00a188dc59a5988 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:36 +0200 Subject: ipmr, ip6mr: Unite mfc seq logic With the exception of the final dump, ipmr and ip6mr have the exact same seq logic for traversing a given mr_table. Refactor that code and make it common. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 69 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 46a082e25dab..a007c5ad0fde 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -203,4 +204,72 @@ static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg) { return mr_mfc_find_parent(mrt, hasharg, -1); } + +#ifdef CONFIG_PROC_FS +struct mr_mfc_iter { + struct seq_net_private p; + struct mr_table *mrt; + struct list_head *cache; + + /* Lock protecting the mr_table's unresolved queue */ + spinlock_t *lock; +}; + +#ifdef CONFIG_IP_MROUTE_COMMON +/* These actually return 'struct mr_mfc *', but to avoid need for explicit + * castings they simply return void. + */ +void *mr_mfc_seq_idx(struct net *net, + struct mr_mfc_iter *it, loff_t pos); +void *mr_mfc_seq_next(struct seq_file *seq, void *v, + loff_t *pos); + +static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos, + struct mr_table *mrt, spinlock_t *lock) +{ + struct mr_mfc_iter *it = seq->private; + + it->mrt = mrt; + it->cache = NULL; + it->lock = lock; + + return *pos ? mr_mfc_seq_idx(seq_file_net(seq), + seq->private, *pos - 1) + : SEQ_START_TOKEN; +} + +static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v) +{ + struct mr_mfc_iter *it = seq->private; + struct mr_table *mrt = it->mrt; + + if (it->cache == &mrt->mfc_unres_queue) + spin_unlock_bh(it->lock); + else if (it->cache == &mrt->mfc_cache_list) + rcu_read_unlock(); +} +#else +static inline void *mr_mfc_seq_idx(struct net *net, + struct mr_mfc_iter *it, loff_t pos) +{ + return NULL; +} + +static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + return NULL; +} + +static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos, + struct mr_table *mrt, spinlock_t *lock) +{ + return NULL; +} + +static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v) +{ +} +#endif +#endif #endif -- cgit v1.2.3 From 3feda6b46f734704840685a62b645cbe4efb810c Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:37 +0200 Subject: ipmr, ip6mr: Unite vif seq functions Same as previously done with the mfc seq, the logic for the vif seq is refactored to be shared between ipmr and ip6mr. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index a007c5ad0fde..cfaec9bd2d3c 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -206,6 +206,12 @@ static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg) } #ifdef CONFIG_PROC_FS +struct mr_vif_iter { + struct seq_net_private p; + struct mr_table *mrt; + int ct; +}; + struct mr_mfc_iter { struct seq_net_private p; struct mr_table *mrt; @@ -216,6 +222,16 @@ struct mr_mfc_iter { }; #ifdef CONFIG_IP_MROUTE_COMMON +void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos); +void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos); + +static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos) +{ + return *pos ? mr_vif_seq_idx(seq_file_net(seq), + seq->private, *pos - 1) + : SEQ_START_TOKEN; +} + /* These actually return 'struct mr_mfc *', but to avoid need for explicit * castings they simply return void. */ @@ -249,6 +265,23 @@ static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock(); } #else +static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, + loff_t pos) +{ + return NULL; +} + +static inline void *mr_vif_seq_next(struct seq_file *seq, + void *v, loff_t *pos) +{ + return NULL; +} + +static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos) +{ + return NULL; +} + static inline void *mr_mfc_seq_idx(struct net *net, struct mr_mfc_iter *it, loff_t pos) { -- cgit v1.2.3 From 889cd83cbe411dda854429f3223ab2d31a860a4a Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:38 +0200 Subject: ip6mr: Remove MFC_NOTIFY and refactor flags MFC_NOTIFY exists in ip6mr, probably as some legacy code [was already removed for ipmr in commit 06bd6c0370bb ("net: ipmr: remove unused MFC_NOTIFY flag and make the flags enum"). Remove it from ip6mr as well, and move the enum into a common file; Notice MFC_OFFLOAD is currently only used by ipmr. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 9 --------- include/linux/mroute6.h | 3 --- include/linux/mroute_base.h | 9 +++++++++ 3 files changed, 9 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 63b36e6c72a0..7ed82e4f11b3 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -65,15 +65,6 @@ struct vif_entry_notifier_info { #define VIFF_STATIC 0x8000 -/* mfc_flags: - * MFC_STATIC - the entry was added statically (not by a routing daemon) - * MFC_OFFLOAD - the entry was offloaded to the hardware - */ -enum { - MFC_STATIC = BIT(0), - MFC_OFFLOAD = BIT(1), -}; - struct mfc_cache_cmp_arg { __be32 mfc_mcastgrp; __be32 mfc_origin; diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 6acf576fc135..1ac38e6819f5 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -81,9 +81,6 @@ struct mfc6_cache { }; }; -#define MFC_STATIC 1 -#define MFC_NOTIFY 2 - #define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ struct rtmsg; diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index cfaec9bd2d3c..f40202b16dae 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -45,6 +45,15 @@ struct vif_device { #define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev)) +/* mfc_flags: + * MFC_STATIC - the entry was added statically (not by a routing daemon) + * MFC_OFFLOAD - the entry was offloaded to the hardware + */ +enum { + MFC_STATIC = BIT(0), + MFC_OFFLOAD = BIT(1), +}; + /** * struct mr_mfc - common multicast routing entries * @mnode: rhashtable list -- cgit v1.2.3 From 7b0db85737db3f4d76b2a412e4f19eae59b8b494 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 28 Feb 2018 23:29:39 +0200 Subject: ipmr, ip6mr: Unite dumproute flows The various MFC entries are being held in the same kind of mr_tables for both ipmr and ip6mr, and their traversal logic is identical. Also, with the exception of the addresses [and other small tidbits] the major bulk of the nla setting is identical. Unite as much of the dumping as possible between the two. Notice this requires creating an mr_table iterator for each, as the for-each preprocessor macro can't be used by the common logic. Signed-off-by: Yuval Mintz Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index f40202b16dae..c2560cb50f1d 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -170,6 +170,16 @@ void *mr_mfc_find_parent(struct mr_table *mrt, void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi); void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg); +int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mr_mfc *c, struct rtmsg *rtm); +int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, + struct mr_table *(*iter)(struct net *net, + struct mr_table *mrt), + int (*fill)(struct mr_table *mrt, + struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -207,6 +217,25 @@ static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt, { return NULL; } + +static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mr_mfc *c, struct rtmsg *rtm) +{ + return -EINVAL; +} + +static inline int +mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, + struct mr_table *(*iter)(struct net *net, + struct mr_table *mrt), + int (*fill)(struct mr_table *mrt, + struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock) +{ + return -EINVAL; +} #endif static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg) -- cgit v1.2.3 From e8a714e086e42972fd0e2d59e90c28eb2d839429 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 1 Mar 2018 16:08:56 -0800 Subject: net: phy: Export gen10g_* functions In order to remove a fair amount of duplication in the different 10G PHY drivers, export all gen10g_* functions to be able to make use of those. While we are at it, rename gen10g_soft_reset() to gen10g_no_soft_reset() to illustrate what it does. Signed-off-by: Florian Fainelli --- include/linux/phy.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5a0c3e53e7c2..6e38c699b753 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -994,6 +994,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); +/* The gen10g_* functions are the old Clause 45 stub */ +int gen10g_config_aneg(struct phy_device *phydev); +int gen10g_read_status(struct phy_device *phydev); +int gen10g_no_soft_reset(struct phy_device *phydev); +int gen10g_config_init(struct phy_device *phydev); +int gen10g_suspend(struct phy_device *phydev); +int gen10g_resume(struct phy_device *phydev); + static inline int phy_read_status(struct phy_device *phydev) { if (!phydev->drv) -- cgit v1.2.3 From 7576594c8e69f5a9e08c5b952d5139bb43574bbc Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 19 Feb 2018 23:35:54 +0100 Subject: mtd: nand: remove useless fields from pxa3xx NAND platform data The "enable arbiter" bit is available only for pxa3xx based platforms but it was experimentally shown that even if this bit is reserved, some Marvell platforms (64-bit) actually need it to be set. The driver always set this bit regardless of this property, which is harmless. Then this property is not needed. The "num_cs" field is always 1 and for a good reason, the old driver (pxa3xx_nand.c) could only handle one. The new driver that replaces it (marvell_nand.c) can handle more, but better use device tree for such description. As there is only one available chip select, there is no need for an array of partitions neither an array of partition numbers. Signed-off-by: Miquel Raynal Acked-by: Robert Jarzmik Signed-off-by: Boris Brezillon --- include/linux/platform_data/mtd-nand-pxa3xx.h | 43 ++++++++------------------- 1 file changed, 12 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-pxa3xx.h b/include/linux/platform_data/mtd-nand-pxa3xx.h index b42ad83cbc20..4fd0f592a2d2 100644 --- a/include/linux/platform_data/mtd-nand-pxa3xx.h +++ b/include/linux/platform_data/mtd-nand-pxa3xx.h @@ -6,41 +6,22 @@ #include /* - * Current pxa3xx_nand controller has two chip select which - * both be workable. - * - * Notice should be taken that: - * When you want to use this feature, you should not enable the - * keep configuration feature, for two chip select could be - * attached with different nand chip. The different page size - * and timing requirement make the keep configuration impossible. + * Current pxa3xx_nand controller has two chip select which both be workable but + * historically all platforms remaining on platform data used only one. Switch + * to device tree if you need more. */ - -/* The max num of chip select current support */ -#define NUM_CHIP_SELECT (2) struct pxa3xx_nand_platform_data { - - /* the data flash bus is shared between the Static Memory - * Controller and the Data Flash Controller, the arbiter - * controls the ownership of the bus - */ - int enable_arbiter; - - /* allow platform code to keep OBM/bootloader defined NFC config */ - int keep_config; - - /* indicate how many chip selects will be used */ - int num_cs; - - /* use an flash-based bad block table */ - bool flash_bbt; - - /* requested ECC strength and ECC step size */ + /* Keep OBM/bootloader NFC timing configuration */ + bool keep_config; + /* Use a flash-based bad block table */ + bool flash_bbt; + /* Requested ECC strength and ECC step size */ int ecc_strength, ecc_step_size; - - const struct mtd_partition *parts[NUM_CHIP_SELECT]; - unsigned int nr_parts[NUM_CHIP_SELECT]; + /* Partitions */ + const struct mtd_partition *parts; + unsigned int nr_parts; }; extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info); + #endif /* __ASM_ARCH_PXA3XX_NAND_H */ -- cgit v1.2.3 From fde9fc766e96c494b82931b1d270a9a751be07c0 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 19 Feb 2018 16:55:06 +0000 Subject: signals: Move put_compat_sigset to compat.h to silence hardened usercopy Since commit afcc90f8621e ("usercopy: WARN() on slab cache usercopy region violations"), MIPS systems booting with a compat root filesystem emit a warning when copying compat siginfo to userspace: WARNING: CPU: 0 PID: 953 at mm/usercopy.c:81 usercopy_warn+0x98/0xe8 Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLAB object 'task_struct' (offset 1432, size 16)! Modules linked in: CPU: 0 PID: 953 Comm: S01logging Not tainted 4.16.0-rc2 #10 Stack : ffffffff808c0000 0000000000000000 0000000000000001 65ac85163f3bdc4a 65ac85163f3bdc4a 0000000000000000 90000000ff667ab8 ffffffff808c0000 00000000000003f8 ffffffff808d0000 00000000000000d1 0000000000000000 000000000000003c 0000000000000000 ffffffff808c8ca8 ffffffff808d0000 ffffffff808d0000 ffffffff80810000 fffffc0000000000 ffffffff80785c30 0000000000000009 0000000000000051 90000000ff667eb0 90000000ff667db0 000000007fe0d938 0000000000000018 ffffffff80449958 0000000020052798 ffffffff808c0000 90000000ff664000 90000000ff667ab0 00000000100c0000 ffffffff80698810 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 ffffffff8010d02c 65ac85163f3bdc4a ... Call Trace: [] show_stack+0x9c/0x130 [] dump_stack+0x90/0xd0 [] __warn+0x100/0x118 [] warn_slowpath_fmt+0x4c/0x70 [] usercopy_warn+0x98/0xe8 [] __check_object_size+0xfc/0x250 [] put_compat_sigset+0x30/0x88 [] setup_rt_frame_n32+0xc4/0x160 [] do_signal+0x19c/0x230 [] do_notify_resume+0x60/0x78 [] work_notifysig+0x10/0x18 ---[ end trace 88fffbf69147f48a ]--- Commit 5905429ad856 ("fork: Provide usercopy whitelisting for task_struct") noted that: "While the blocked and saved_sigmask fields of task_struct are copied to userspace (via sigmask_to_save() and setup_rt_frame()), it is always copied with a static length (i.e. sizeof(sigset_t))." However, this is not true in the case of compat signals, whose sigset is copied by put_compat_sigset and receives size as an argument. At most call sites, put_compat_sigset is copying a sigset from the current task_struct. This triggers a warning when CONFIG_HARDENED_USERCOPY is active. However, by marking this function as static inline, the warning can be avoided because in all of these cases the size is constant at compile time, which is allowed. The only site where this is not the case is handling the rt_sigpending syscall, but there the copy is being made from a stack local variable so does not trigger the warning. Move put_compat_sigset to compat.h, and mark it static inline. This fixes the WARN on MIPS. Fixes: afcc90f8621e ("usercopy: WARN() on slab cache usercopy region violations") Signed-off-by: Matt Redfearn Acked-by: Kees Cook Cc: "Dmitry V . Levin" Cc: Al Viro Cc: kernel-hardening@lists.openwall.com Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/18639/ Signed-off-by: James Hogan --- include/linux/compat.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 8a9643857c4a..c4139c7a0de0 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -17,6 +17,7 @@ #include #include #include /* for aio_context_t */ +#include #include #include @@ -550,8 +551,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat); -extern int put_compat_sigset(compat_sigset_t __user *compat, - const sigset_t *set, unsigned int size); + +/* + * Defined inline such that size can be compile time constant, which avoids + * CONFIG_HARDENED_USERCOPY complaining about copies from task_struct + */ +static inline int +put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, + unsigned int size) +{ + /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */ +#ifdef __BIG_ENDIAN + compat_sigset_t v; + switch (_NSIG_WORDS) { + case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3]; + case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2]; + case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1]; + case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0]; + } + return copy_to_user(compat, &v, size) ? -EFAULT : 0; +#else + return copy_to_user(compat, set, size) ? -EFAULT : 0; +#endif +} asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, -- cgit v1.2.3 From d02f51cbcf12b09ab945873e35046045875eed9a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Sat, 3 Mar 2018 03:03:46 +0100 Subject: bpf: fix bpf_skb_adjust_net/bpf_skb_proto_xlat to deal with gso sctp skbs SCTP GSO skbs have a gso_size of GSO_BY_FRAGS, so any sort of unconditionally mangling of that will result in nonsense value and would corrupt the skb later on. Therefore, i) add two helpers skb_increase_gso_size() and skb_decrease_gso_size() that would throw a one time warning and bail out for such skbs and ii) refuse and return early with an error in those BPF helpers that are affected. We do need to bail out as early as possible from there before any changes on the skb have been performed. Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") Co-authored-by: Daniel Borkmann Signed-off-by: Daniel Axtens Cc: Marcelo Ricardo Leitner Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c1e66bdcf583..8c67c33f40c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4038,6 +4038,12 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } +/* Note: Should be called only if skb_is_gso(skb) is true */ +static inline bool skb_is_gso_sctp(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP; +} + static inline void skb_gso_reset(struct sk_buff *skb) { skb_shinfo(skb)->gso_size = 0; @@ -4045,6 +4051,22 @@ static inline void skb_gso_reset(struct sk_buff *skb) skb_shinfo(skb)->gso_type = 0; } +static inline void skb_increase_gso_size(struct skb_shared_info *shinfo, + u16 increment) +{ + if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) + return; + shinfo->gso_size += increment; +} + +static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo, + u16 decrement) +{ + if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) + return; + shinfo->gso_size -= decrement; +} + void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) -- cgit v1.2.3 From 39a751a4cb7e4798f0ce1169ec92de4a1aae39e3 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Mon, 12 Feb 2018 00:19:42 -0800 Subject: of: change overlay apply input data from unflattened to FDT Move duplicating and unflattening of an overlay flattened devicetree (FDT) into the overlay application code. To accomplish this, of_overlay_apply() is replaced by of_overlay_fdt_apply(). The copy of the FDT (aka "duplicate FDT") now belongs to devicetree code, which is thus responsible for freeing the duplicate FDT. The caller of of_overlay_fdt_apply() remains responsible for freeing the original FDT. The unflattened devicetree now belongs to devicetree code, which is thus responsible for freeing the unflattened devicetree. These ownership changes prevent early freeing of the duplicated FDT or the unflattened devicetree, which could result in use after free errors. of_overlay_fdt_apply() is a private function for the anticipated overlay loader. Update unittest.c to use of_overlay_fdt_apply() instead of of_overlay_apply(). Move overlay fragments from artificial locations in drivers/of/unittest-data/tests-overlay.dtsi into one devicetree source file per overlay. This led to changes in drivers/of/unitest-data/Makefile and drivers/of/unitest.c. - Add overlay directives to the overlay devicetree source files so that dtc will compile them as true overlays into one FDT data chunk per overlay. - Set CFLAGS for drivers/of/unittest-data/testcases.dts so that symbols will be generated for overlay resolution of overlays that are no longer artificially contained in testcases.dts - Unflatten and apply each unittest overlay FDT using of_overlay_fdt_apply(). - Enable the of_resolve_phandles() check for whether the unflattened overlay is detached. This check was previously disabled because the overlays from tests-overlay.dtsi were not unflattened into detached trees. - Other changes to unittest.c infrastructure to manage multiple test FDTs built into the kernel image (access by name instead of arbitrary number). - of_unittest_overlay_high_level(): previously unused code to add properties from the overlay_base devicetree to the live tree was triggered by the restructuring of tests-overlay.dtsi and thus testcases.dts. This exposed two bugs: (1) the need to dup a property before adding it, and (2) property 'name' is auto-generated in the unflatten code and thus will be a duplicate in the __symbols__ node - do not treat this duplicate as an error. Signed-off-by: Frank Rowand --- include/linux/of.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index da1ee95241c1..ebf22dd0860c 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1359,8 +1359,8 @@ struct of_overlay_notify_data { #ifdef CONFIG_OF_OVERLAY -/* ID based overlays; the API for external users */ -int of_overlay_apply(struct device_node *tree, int *ovcs_id); +int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, + int *ovcs_id); int of_overlay_remove(int *ovcs_id); int of_overlay_remove_all(void); @@ -1369,7 +1369,7 @@ int of_overlay_notifier_unregister(struct notifier_block *nb); #else -static inline int of_overlay_apply(struct device_node *tree, int *ovcs_id) +static inline int of_overlay_fdt_apply(void *overlay_fdt, int *ovcs_id) { return -ENOTSUPP; } -- cgit v1.2.3 From 325ea10c0809406ce23f038602abbc454f3f761d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 3 Mar 2018 12:20:47 +0100 Subject: sched/headers: Simplify and clean up header usage in the scheduler Do the following cleanups and simplifications: - sched/sched.h already includes , so no need to include it in sched/core.c again. - order the headers alphabetically - add all headers to kernel/sched/sched.h - remove all unnecessary includes from the .c files that are already included in kernel/sched/sched.h. Finally, make all scheduler .c files use a single common header: #include "sched.h" ... which now contains a union of the relied upon headers. This makes the various .c files easier to read and easier to handle. Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/deadline.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index a5bc8728ead7..0cb034331cbb 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,8 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SCHED_DEADLINE_H -#define _LINUX_SCHED_DEADLINE_H - -#include /* * SCHED_DEADLINE tasks has negative priorities, reflecting @@ -28,5 +24,3 @@ static inline bool dl_time_before(u64 a, u64 b) { return (s64)(a - b) < 0; } - -#endif /* _LINUX_SCHED_DEADLINE_H */ -- cgit v1.2.3 From 779b7931b27bfa80bac46d0115d229259aef580b Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 1 Mar 2018 17:13:37 +1100 Subject: net: rename skb_gso_validate_mtu -> skb_gso_validate_network_len If you take a GSO skb, and split it into packets, will the network length (L3 headers + L4 headers + payload) of those packets be small enough to fit within a given MTU? skb_gso_validate_mtu gives you the answer to that question. However, we recently added to add a way to validate the MAC length of a split GSO skb (L2+L3+L4+payload), and the names get confusing, so rename skb_gso_validate_mtu to skb_gso_validate_network_len Signed-off-by: Daniel Axtens Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c1e66bdcf583..a057dd1a75c7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3286,7 +3286,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); -bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); -- cgit v1.2.3 From a4a77718ee4053a44aa40fe67247c1afb5ce2f1e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 1 Mar 2018 17:13:40 +1100 Subject: net: make skb_gso_*_seglen functions private They're very hard to use properly as they do not consider the GSO_BY_FRAGS case. Code should use skb_gso_validate_network_len and skb_gso_validate_mac_len as they do consider this case. Make the seglen functions static, which stops people using them outside of skbuff.c Signed-off-by: Daniel Axtens Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/skbuff.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a057dd1a75c7..ddf77cf4ff2d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3285,7 +3285,6 @@ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); -unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); @@ -4104,38 +4103,6 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb) return !skb->head_frag || skb_cloned(skb); } -/** - * skb_gso_network_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_network_seglen is used to determine the real size of the - * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). - * - * The MAC/L2 header is not accounted for. - */ -static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - - skb_network_header(skb); - return hdr_len + skb_gso_transport_seglen(skb); -} - -/** - * skb_gso_mac_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_mac_seglen is used to determine the real size of the - * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 - * headers (TCP/UDP). - */ -static inline unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - return hdr_len + skb_gso_transport_seglen(skb); -} - /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. -- cgit v1.2.3 From 218f6024abec04ec78e56b6761f70d404bab8637 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 18 Jan 2018 01:28:10 +0900 Subject: mmc: tmio: remove TMIO_MMC_WRPROTECT_DISABLE The use of this flag has been replaced with MMC_CAP2_NO_WRITE_PROTECT. No platform defines this flag any more. Remove. Signed-off-by: Masahiro Yamada Acked-by: Lee Jones Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson Tested-by: Wolfram Sang --- include/linux/mfd/tmio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 396a103c8bc6..91f92215ca74 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -36,7 +36,6 @@ } while (0) /* tmio MMC platform flags */ -#define TMIO_MMC_WRPROTECT_DISABLE BIT(0) /* * Some controllers can support a 2-byte block size when the bus width * is configured in 4-bit mode. -- cgit v1.2.3 From 36f1d7e817a5540f6624ce1007339688bd443308 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 27 Feb 2018 14:51:25 +0200 Subject: mmc: slot-gpio: Add a function to enable/disable card detect IRQ wakeup Commit 03dbaa04a2e5 ("mmc: slot-gpio: Add support to enable irq wake on cd_irq") enabled wakeup at initialization. However drivers may wish to enable and disable based on different criteria. Add a helper function mmc_gpio_set_cd_wake() to make it easy for drivers to do that. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/slot-gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 91f1ba0663c8..06607c59c4d0 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -31,6 +31,7 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, unsigned int debounce, bool *gpio_invert); void mmc_gpio_set_cd_isr(struct mmc_host *host, irqreturn_t (*isr)(int irq, void *dev_id)); +int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on); void mmc_gpiod_request_cd_irq(struct mmc_host *host); bool mmc_can_gpio_cd(struct mmc_host *host); bool mmc_can_gpio_ro(struct mmc_host *host); -- cgit v1.2.3 From 01fd61c0b9bd85ab41fb60fbd781d44882ee6887 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 27 Feb 2018 14:14:11 -0600 Subject: PCI: Add a return type for pci_reset_bridge_secondary_bus() Add a return value to pci_reset_bridge_secondary_bus() so we can return an error if the device doesn't become ready after the reset. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index af75d9d76189..562875d34b98 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1095,7 +1095,7 @@ int pci_reset_bus(struct pci_bus *bus); int pci_try_reset_bus(struct pci_bus *bus); void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); -void pci_reset_bridge_secondary_bus(struct pci_dev *dev); +int pci_reset_bridge_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); -- cgit v1.2.3 From cceae76ef3a1181242e4f7b559a7bfc904a9855c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 11 Feb 2018 19:17:20 +0900 Subject: netfilter: nfnetlink_acct: remove useless parameter parameter skb in nfnl_acct_overquota is not used anywhere. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_acct.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h index b4d741195c28..beee8bffe49e 100644 --- a/include/linux/netfilter/nfnetlink_acct.h +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -16,6 +16,5 @@ struct nf_acct; struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name); void nfnl_acct_put(struct nf_acct *acct); void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); -int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb, - struct nf_acct *nfacct); +int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct); #endif /* _NFNL_ACCT_H */ -- cgit v1.2.3 From 1b293e30f759b03f246baae862bdf35e57b2c39e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:29 +0100 Subject: netfilter: x_tables: move hook entry checks into core Allow followup patch to change on location instead of three. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1313b35c3ab7..fa0c19c328f1 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -281,6 +281,8 @@ int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); +int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks); + unsigned int *xt_alloc_entry_offsets(unsigned int size); bool xt_find_jump_offset(const unsigned int *offsets, unsigned int target, unsigned int size); -- cgit v1.2.3 From c84ca954ac9fa67a6ce27f91f01e4451c74fd8f6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:33 +0100 Subject: netfilter: x_tables: add counters allocation wrapper allows to have size checks in a single spot. This is supposed to reduce oom situations when fuzz-testing xtables. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index fa0c19c328f1..0bd93c589a8c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -301,6 +301,7 @@ int xt_data_to_user(void __user *dst, const void *src, void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); +struct xt_counters *xt_counters_alloc(unsigned int counters); struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, -- cgit v1.2.3 From 9782a11efc072faaf91d4aa60e9d23553f918029 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:34 +0100 Subject: netfilter: compat: prepare xt_compat_init_offsets to return errors should have no impact, function still always returns 0. This patch is only to ease review. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 0bd93c589a8c..7bd896dc78df 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -510,7 +510,7 @@ void xt_compat_unlock(u_int8_t af); int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); void xt_compat_flush_offsets(u_int8_t af); -void xt_compat_init_offsets(u_int8_t af, unsigned int number); +int xt_compat_init_offsets(u8 af, unsigned int number); int xt_compat_calc_jump(u_int8_t af, unsigned int offset); int xt_compat_match_offset(const struct xt_match *match); -- cgit v1.2.3 From a6f1086e29e93621a6394b94b8c0e4a4e490f38b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 5 Mar 2018 15:22:30 -0800 Subject: PCI: Move of_irq_parse_and_map_pci() declaration under OF_IRQ Since commit 4670d610d592 ("PCI: Move OF-related PCI functions into PCI core"), sparc:allmodconfig fails to build with the following error. pcie-cadence-host.c:(.text+0x4c4): undefined reference to `of_irq_parse_and_map_pci' pcie-cadence-host.c:(.text+0x4c8): undefined reference to `of_irq_parse_and_map_pci' of_irq_parse_and_map_pci() is now only available if OF_IRQ is enabled. Make its declaration and its dummy function dependent on OF_IRQ to solve the problem. Fixes: 4670d610d592 ("PCI: Move OF-related PCI functions into PCI core") Signed-off-by: Guenter Roeck Signed-off-by: Bjorn Helgaas Acked-by: Rob Herring --- include/linux/of_pci.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 88865e0ebf4d..091033a6b836 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -13,7 +13,6 @@ struct device_node; struct device_node *of_pci_find_child_device(struct device_node *parent, unsigned int devfn); int of_pci_get_devfn(struct device_node *np); -int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); int of_pci_get_max_link_speed(struct device_node *node); @@ -33,12 +32,6 @@ static inline int of_pci_get_devfn(struct device_node *np) return -EINVAL; } -static inline int -of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin) -{ - return 0; -} - static inline int of_pci_parse_bus_range(struct device_node *node, struct resource *res) { @@ -67,6 +60,16 @@ of_pci_get_max_link_speed(struct device_node *node) static inline void of_pci_check_probe_only(void) { } #endif +#if IS_ENABLED(CONFIG_OF_IRQ) +int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); +#else +static inline int +of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin) +{ + return 0; +} +#endif + #if defined(CONFIG_OF_ADDRESS) int of_pci_get_host_bridge_resources(struct device_node *dev, unsigned char busno, unsigned char bus_max, -- cgit v1.2.3 From 9130ba884640328bb78aaa4840e5ddf06ccafb1c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 27 Feb 2018 17:40:38 -0600 Subject: scripts/dtc: Update to upstream version v1.4.6-9-gaadd0b65c987 This adds the following commits from upstream: aadd0b65c987 checks: centralize printing of property names in failure messages 88960e398907 checks: centralize printing of node path in check_msg f1879e1a50eb Add limited read-only support for older (V2 and V3) device tree to libfdt. 37dea76e9700 srcpos: drop special handling of tab 65893da4aee0 libfdt: overlay: Add missing license 962a45ca034d Avoid installing pylibfdt when dependencies are missing cd6ea1b2bea6 Makefile: Split INSTALL out into INSTALL_{PROGRAM,LIB,DATA,SCRIPT} 51b3a16338df Makefile.tests: Add LIBDL make(1) variable for portability sake 333d533a8f4d Attempt to auto-detect stat(1) being used if not given proper invocation e54388015af1 dtc: Bump version to v1.4.6 a1fe86f380cb fdtoverlay: Switch from using alloca to malloc c8d5472de3ff tests: Improve compatibility with other platforms c81d389a10cc checks: add chosen node checks e671852042a7 checks: add aliases node checks d0c44ebe3f42 checks: check for #{size,address}-cells without child nodes 18a3d84bb802 checks: add string list check for *-names properties 8fe94fd6f19f checks: add string list check 6c5730819604 checks: add a string check for 'label' property a384191eba09 checks: fix sound-dai phandle with arg property check b260c4f610c0 Fix ambiguous grammar for devicetree rule fe667e382bac tests: Add some basic tests for the pci_bridge checks 7975f6422260 Fix widespread incorrect use of strneq(), replace with new strprefixeq() fca296445eab Add strstarts() helper function cc392f089007 tests: Check non-matching cases for fdt_node_check_compatible() bba26a5291c8 livetree: avoid assertion of orphan phandles with overlays c8f8194d76cc implement strnlen for systems that need it c8b38f65fdec libfdt: Remove leading underscores from identifiers 3b62fdaebfe5 Remove leading underscores from identifiers 2d45d1c5c65e Replace FDT_VERSION() with stringify() 2e6fe5a107b5 Fix some errors in comments b0ae9e4b0ceb tests: Correct warning in sw_tree1.c Commit c8b38f65fdec upstream ("libfdt: Remove leading underscores from identifiers") changed the multiple inclusion define protection, so the kernel's libfdt_env.h needs the corresponding update. Signed-off-by: Rob Herring --- include/linux/libfdt_env.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h index 14997285e53d..c6ac1fe7ec68 100644 --- a/include/linux/libfdt_env.h +++ b/include/linux/libfdt_env.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBFDT_ENV_H -#define _LIBFDT_ENV_H +#ifndef LIBFDT_ENV_H +#define LIBFDT_ENV_H #include @@ -15,4 +15,4 @@ typedef __be64 fdt64_t; #define fdt64_to_cpu(x) be64_to_cpu(x) #define cpu_to_fdt64(x) cpu_to_be64(x) -#endif /* _LIBFDT_ENV_H */ +#endif /* LIBFDT_ENV_H */ -- cgit v1.2.3 From 859d880cf544dbe095ce97534ef04cd88ba2f2b4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 6 Mar 2018 00:20:25 -0600 Subject: signal: Correct the offset of si_pkey in struct siginfo The change moving addr_lsb into the _sigfault union failed to take into account that _sigfault._addr_bnd._lower being a pointer forced the entire union to have pointer alignment. In practice this only mattered for the offset of si_pkey which is why this has taken so long to discover. To correct this change _dummy_pkey and _dummy_bnd to have pointer type. Reported-by: kernel test robot Fixes: b68a68d3dcc1 ("signal: Move addr_lsb into the _sigfault union for clarity") Signed-off-by: "Eric W. Biederman" --- include/linux/compat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 8a9643857c4a..e16d07eb08cf 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -229,13 +229,13 @@ typedef struct compat_siginfo { short int _addr_lsb; /* Valid LSB of the reported address. */ /* used when si_code=SEGV_BNDERR */ struct { - short _dummy_bnd; + compat_uptr_t _dummy_bnd; compat_uptr_t _lower; compat_uptr_t _upper; } _addr_bnd; /* used when si_code=SEGV_PKUERR */ struct { - short _dummy_pkey; + compat_uptr_t _dummy_pkey; u32 _pkey; } _addr_pkey; }; -- cgit v1.2.3 From dde67eb1beebcd8493e7b30e74a80f0865ab7e36 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 22 Jan 2018 08:32:01 +0100 Subject: i2c: add i2c_get_device_id() to get the standard i2c device id Can be used during probe to double check that the probed device is what is expected. Loosely based on code from Adrian Fiergolski . Tested-by: Adrian Fiergolski Reviewed-by: Wolfram Sang Signed-off-by: Peter Rosin --- include/linux/i2c.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 419a38e7c315..44ad14e016b5 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -47,6 +47,7 @@ struct i2c_algorithm; struct i2c_adapter; struct i2c_client; struct i2c_driver; +struct i2c_device_identity; union i2c_smbus_data; struct i2c_board_info; enum i2c_slave_event; @@ -186,8 +187,37 @@ extern s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, extern s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client, u8 command, u8 length, u8 *values); +int i2c_get_device_id(const struct i2c_client *client, + struct i2c_device_identity *id); #endif /* I2C */ +/** + * struct i2c_device_identity - i2c client device identification + * @manufacturer_id: 0 - 4095, database maintained by NXP + * @part_id: 0 - 511, according to manufacturer + * @die_revision: 0 - 7, according to manufacturer + */ +struct i2c_device_identity { + u16 manufacturer_id; +#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS 0 +#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_1 1 +#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_2 2 +#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_3 3 +#define I2C_DEVICE_ID_RAMTRON_INTERNATIONAL 4 +#define I2C_DEVICE_ID_ANALOG_DEVICES 5 +#define I2C_DEVICE_ID_STMICROELECTRONICS 6 +#define I2C_DEVICE_ID_ON_SEMICONDUCTOR 7 +#define I2C_DEVICE_ID_SPRINTEK_CORPORATION 8 +#define I2C_DEVICE_ID_ESPROS_PHOTONICS_AG 9 +#define I2C_DEVICE_ID_FUJITSU_SEMICONDUCTOR 10 +#define I2C_DEVICE_ID_FLIR 11 +#define I2C_DEVICE_ID_O2MICRO 12 +#define I2C_DEVICE_ID_ATMEL 13 +#define I2C_DEVICE_ID_NONE 0xffff + u16 part_id; + u8 die_revision; +}; + enum i2c_alert_protocol { I2C_PROTOCOL_SMBUS_ALERT, I2C_PROTOCOL_SMBUS_HOST_NOTIFY, -- cgit v1.2.3 From c17a7476e4c41884d82e3675c25ceae982c07a63 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 8 Dec 2017 15:29:44 +0100 Subject: HID: core: rewrite the hid-generic automatic unbind We actually can have the unbind/rebind logic in hid-core.c, leaving only the match function in hid-generic. This makes hid-generic simpler and the whole logic simpler too. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 091a81cf330f..a62ee4a609ac 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -686,8 +686,6 @@ struct hid_usage_id { * @input_mapped: invoked on input registering after mapping an usage * @input_configured: invoked just before the device is registered * @feature_mapping: invoked on feature registering - * @bus_add_driver: invoked when a HID driver is about to be added - * @bus_removed_driver: invoked when a HID driver has been removed * @suspend: invoked on suspend (NULL means nop) * @resume: invoked on resume if device was not reset (NULL means nop) * @reset_resume: invoked on resume if device was reset (NULL means nop) @@ -742,8 +740,6 @@ struct hid_driver { void (*feature_mapping)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage); - void (*bus_add_driver)(struct hid_driver *driver); - void (*bus_removed_driver)(struct hid_driver *driver); #ifdef CONFIG_PM int (*suspend)(struct hid_device *hdev, pm_message_t message); int (*resume)(struct hid_device *hdev); -- cgit v1.2.3 From cb88a0588717ba6c756cb5972d75766b273a6817 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 6 Mar 2018 09:38:49 +0100 Subject: usb: quirks: add control message delay for 1b1c:1b20 Corsair Strafe RGB keyboard does not respond to usb control messages sometimes and hence generates timeouts. Commit de3af5bf259d ("usb: quirks: add delay init quirk for Corsair Strafe RGB keyboard") tried to fix those timeouts by adding USB_QUIRK_DELAY_INIT. Unfortunately, even with this quirk timeouts of usb_control_msg() can still be seen, but with a lower frequency (approx. 1 out of 15): [ 29.103520] usb 1-8: string descriptor 0 read error: -110 [ 34.363097] usb 1-8: can't set config #1, error -110 Adding further delays to different locations where usb control messages are issued just moves the timeouts to other locations, e.g.: [ 35.400533] usbhid 1-8:1.0: can't add hid device: -110 [ 35.401014] usbhid: probe of 1-8:1.0 failed with error -110 The only way to reliably avoid those issues is having a pause after each usb control message. In approx. 200 boot cycles no more timeouts were seen. Addionaly, keep USB_QUIRK_DELAY_INIT as it turned out to be necessary to have the delay in hub_port_connect() after hub_port_init(). The overall boot time seems not to be influenced by these additional delays, even on fast machines and lightweight distributions. Fixes: de3af5bf259d ("usb: quirks: add delay init quirk for Corsair Strafe RGB keyboard") Cc: stable@vger.kernel.org Signed-off-by: Danilo Krummrich Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/quirks.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index f1fcec2fd5f8..b7a99ce56bc9 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -63,4 +63,7 @@ */ #define USB_QUIRK_DISCONNECT_SUSPEND BIT(12) +/* Device needs a pause after every control message. */ +#define USB_QUIRK_DELAY_CTRL_MSG BIT(13) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3 From a4429e53c9b3082b05e51224c3d58dbdd39306c5 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 13 Feb 2018 09:05:40 +0800 Subject: KVM: Introduce paravirtualization hints and KVM_HINTS_DEDICATED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces kvm_para_has_hint() to query for hints about the configuration of the guests. The first hint KVM_HINTS_DEDICATED, is set if the guest has dedicated physical CPUs for each vCPU (i.e. pinning and no over-commitment). This allows optimizing spinlocks and tells the guest to avoid PV TLB flush. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Eduardo Habkost Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- include/linux/kvm_para.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 51f6ef2c2ff4..f23b90b02898 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -9,4 +9,9 @@ static inline bool kvm_para_has_feature(unsigned int feature) { return !!(kvm_arch_para_features() & (1UL << feature)); } + +static inline bool kvm_para_has_hint(unsigned int feature) +{ + return !!(kvm_arch_para_hints() & (1UL << feature)); +} #endif /* __LINUX_KVM_PARA_H */ -- cgit v1.2.3 From ce767047b1b731a1899a528338644f2bfdab8b36 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Sun, 4 Mar 2018 22:17:17 -0700 Subject: hv_vmbus: Correct the stale comments regarding cpu affinity The comments doesn't match what the current code does, also have a typo. This patch corrects them. Signed-off-by: Haiyang Zhang Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 93bd6fcd6e62..2048f3c3b68a 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -844,7 +844,7 @@ struct vmbus_channel { /* * NUMA distribution policy: - * We support teo policies: + * We support two policies: * 1) Balanced: Here all performance critical channels are * distributed evenly amongst all the NUMA nodes. * This policy will be the default policy. -- cgit v1.2.3 From 6b4f3d01052a479c7ebbe99d52a663558dc1be2a Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 8 Sep 2017 12:40:01 -0400 Subject: usb, signal, security: only pass the cred, not the secid, to kill_pid_info_as_cred and security_task_kill commit d178bc3a708f39cbfefc3fab37032d3f2511b4ec ("user namespace: usb: make usb urbs user namespace aware (v2)") changed kill_pid_info_as_uid to kill_pid_info_as_cred, saving and passing a cred structure instead of uids. Since the secid can be obtained from the cred, drop the secid fields from the usb_dev_state and async structures, and drop the secid argument to kill_pid_info_as_cred. Replace the secid argument to security_task_kill with the cred. Update SELinux, Smack, and AppArmor to use the cred, which avoids the need for Smack and AppArmor to use a secid at all in this hook. Further changes to Smack might still be required to take full advantage of this change, since it should now be possible to perform capability checking based on the supplied cred. The changes to Smack and AppArmor have only been compile-tested. Signed-off-by: Stephen Smalley Acked-by: Paul Moore Acked-by: Casey Schaufler Acked-by: Greg Kroah-Hartman Acked-by: John Johansen Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 5 +++-- include/linux/sched/signal.h | 2 +- include/linux/security.h | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7161d8e7ee79..e0ac011d07a5 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -672,7 +672,8 @@ * @p contains the task_struct for process. * @info contains the signal information. * @sig contains the signal value. - * @secid contains the sid of the process where the signal originated + * @cred contains the cred of the process where the signal originated, or + * NULL if the current task is the originator. * Return 0 if permission is granted. * @task_prctl: * Check permission before performing a process control operation on the @@ -1564,7 +1565,7 @@ union security_list_options { int (*task_getscheduler)(struct task_struct *p); int (*task_movememory)(struct task_struct *p); int (*task_kill)(struct task_struct *p, struct siginfo *info, - int sig, u32 secid); + int sig, const struct cred *cred); int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); void (*task_to_inode)(struct task_struct *p, struct inode *inode); diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 23b4f9cb82db..a7ce74c74e49 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -319,7 +319,7 @@ extern int force_sig_info(int, struct siginfo *, struct task_struct *); extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, - const struct cred *, u32); + const struct cred *); extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern __must_check bool do_notify_parent(struct task_struct *, int); diff --git a/include/linux/security.h b/include/linux/security.h index 73f1ef625d40..3f5fd988ee87 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -347,7 +347,7 @@ int security_task_setscheduler(struct task_struct *p); int security_task_getscheduler(struct task_struct *p); int security_task_movememory(struct task_struct *p); int security_task_kill(struct task_struct *p, struct siginfo *info, - int sig, u32 secid); + int sig, const struct cred *cred); int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); @@ -1010,7 +1010,7 @@ static inline int security_task_movememory(struct task_struct *p) static inline int security_task_kill(struct task_struct *p, struct siginfo *info, int sig, - u32 secid) + const struct cred *cred) { return 0; } -- cgit v1.2.3 From a9db0ecf1578894ea3405f3eb5a441508840d479 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Wed, 16 Aug 2017 09:43:48 +0300 Subject: {net,IB}/mlx5: Add has_tag to mlx5_flow_act The has_tag member will indicate whether a tag action was specified in flow specification. A flow tag 0 = MLX5_FS_DEFAULT_FLOW_TAG is assumed a valid flow tag that is currently used by mlx5 RDMA driver, whereas in HW flow_tag = 0 means that the user doesn't care about flow_tag. HW always provide a flow_tag = 0 if all flow tags requested on a specific flow are 0. So we need a way (in the driver) to differentiate between a user really requesting flow_tag = 0 and a user who does not care, in order to be able to report conflicting flow tags on a specific flow. Signed-off-by: Matan Barak Reviewed-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index a0b48afcb422..f580bc4c2443 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -141,6 +141,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); struct mlx5_flow_act { u32 action; + bool has_flow_tag; u32 flow_tag; u32 encap_id; u32 modify_id; -- cgit v1.2.3 From 5f4183781a303da5ab6731b8c19328c5b9df89fa Mon Sep 17 00:00:00 2001 From: Aviad Yehezkel Date: Sun, 18 Feb 2018 13:17:17 +0200 Subject: net/mlx5: Add empty egress namespace to flow steering core Currently, we don't support egress flow steering namespace in mlx5 flow steering core implementation. However, when we want to encrypt a packet, we model it as a flow steering rule in the egress path. To overcome this, we add an empty egress namespace to flow steering. This namespace is initialized only when ipsec support exists. In the future, this will grow to a full blown full steering implementation, resembling the ingress path. Signed-off-by: Matan Barak Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index f580bc4c2443..744ea228acea 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -69,6 +69,7 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_ESW_INGRESS, MLX5_FLOW_NAMESPACE_SNIFFER_RX, MLX5_FLOW_NAMESPACE_SNIFFER_TX, + MLX5_FLOW_NAMESPACE_EGRESS, }; struct mlx5_flow_table; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f4e417686f62..9bc4ea0cf5a9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1091,6 +1091,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, + MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99, MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, }; -- cgit v1.2.3 From 3346c4873733a109bea29467308a754038b886a9 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Sun, 20 Aug 2017 15:13:08 +0300 Subject: {net,IB}/mlx5: Add flow steering helpers Add helper functions that check if a protocol is part of a flow steering match criteria. Signed-off-by: Boris Pismenny Signed-off-by: Matan Barak Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs_helpers.h | 134 ++++++++++++++++++++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 8 ++- 2 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 include/linux/mlx5/fs_helpers.h (limited to 'include/linux') diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h new file mode 100644 index 000000000000..7b476bbae731 --- /dev/null +++ b/include/linux/mlx5/fs_helpers.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_HELPERS_ +#define _MLX5_FS_HELPERS_ + +#include + +#define MLX5_FS_IPV4_VERSION 4 +#define MLX5_FS_IPV6_VERSION 6 + +static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c, + const u32 *match_v, u8 match) +{ + const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + + return MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_protocol) == 0xff && + MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol) == match; +} + +static inline bool mlx5_fs_is_outer_tcp_flow(const u32 *match_c, + const u32 *match_v) +{ + return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_TCP); +} + +static inline bool mlx5_fs_is_outer_udp_flow(const u32 *match_c, + const u32 *match_v) +{ + return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_UDP); +} + +static inline bool mlx5_fs_is_vxlan_flow(const u32 *match_c) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters); + + return MLX5_GET(fte_match_set_misc, misc_params_c, vxlan_vni); +} + +static inline bool _mlx5_fs_is_outer_ipv_flow(struct mlx5_core_dev *mdev, + const u32 *match_c, + const u32 *match_v, int version) +{ + int match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); + const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + + if (!match_ipv) { + u16 ethertype; + + switch (version) { + case MLX5_FS_IPV4_VERSION: + ethertype = ETH_P_IP; + break; + case MLX5_FS_IPV6_VERSION: + ethertype = ETH_P_IPV6; + break; + default: + return false; + } + + return MLX5_GET(fte_match_set_lyr_2_4, headers_c, + ethertype) == 0xffff && + MLX5_GET(fte_match_set_lyr_2_4, headers_v, + ethertype) == ethertype; + } + + return MLX5_GET(fte_match_set_lyr_2_4, headers_c, + ip_version) == 0xf && + MLX5_GET(fte_match_set_lyr_2_4, headers_v, + ip_version) == version; +} + +static inline bool +mlx5_fs_is_outer_ipv4_flow(struct mlx5_core_dev *mdev, const u32 *match_c, + const u32 *match_v) +{ + return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v, + MLX5_FS_IPV4_VERSION); +} + +static inline bool +mlx5_fs_is_outer_ipv6_flow(struct mlx5_core_dev *mdev, const u32 *match_c, + const u32 *match_v) +{ + return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v, + MLX5_FS_IPV6_VERSION); +} + +static inline bool mlx5_fs_is_outer_ipsec_flow(const u32 *match_c) +{ + void *misc_params_c = + MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + + return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); +} + +#endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9bc4ea0cf5a9..14ad84afe8ba 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -295,7 +295,9 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_tcp_dport[0x1]; u8 inner_tcp_flags[0x1]; u8 reserved_at_37[0x9]; - u8 reserved_at_40[0x1a]; + u8 reserved_at_40[0x17]; + u8 outer_esp_spi[0x1]; + u8 reserved_at_58[0x2]; u8 bth_dst_qp[0x1]; u8 reserved_at_5b[0x25]; @@ -437,7 +439,9 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_120[0x28]; u8 bth_dst_qp[0x18]; - u8 reserved_at_160[0xa0]; + u8 reserved_at_160[0x20]; + u8 outer_esp_spi[0x20]; + u8 reserved_at_1a0[0x60]; }; struct mlx5_ifc_cmd_pas_bits { -- cgit v1.2.3 From d3dcf8eb615537526bd42ff27a081d46d337816e Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 4 Mar 2018 17:29:48 +0200 Subject: rhashtable: Fix rhlist duplicates insertion When inserting duplicate objects (those with the same key), current rhlist implementation messes up the chain pointers by updating the bucket pointer instead of prev next pointer to the newly inserted node. This causes missing elements on removal and travesal. Fix that by properly updating pprev pointer to point to the correct rhash_head next pointer. Issue: 1241076 Change-Id: I86b2c140bcb4aeb10b70a72a267ff590bb2b17e7 Fixes: ca26893f05e8 ('rhashtable: Add rhlist interface') Signed-off-by: Paul Blakey Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c9df2527e0cd..668a21f04b09 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -766,8 +766,10 @@ slow_path: if (!key || (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head)))) + rhashtable_compare(&arg, rht_obj(ht, head)))) { + pprev = &head->next; continue; + } data = rht_obj(ht, head); -- cgit v1.2.3 From 2695578b896aea472b2c0dcbe9d92daa71738484 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Mar 2018 11:41:13 -0800 Subject: net: usbnet: fix potential deadlock on 32bit hosts Marek reported a LOCKDEP issue occurring on 32bit host, that we tracked down to the fact that usbnet could either run from soft or hard irqs. This patch adds u64_stats_update_begin_irqsave() and u64_stats_update_end_irqrestore() helpers to solve this case. [ 17.768040] ================================ [ 17.772239] WARNING: inconsistent lock state [ 17.776511] 4.16.0-rc3-next-20180227-00007-g876c53a7493c #453 Not tainted [ 17.783329] -------------------------------- [ 17.787580] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ 17.793607] swapper/0/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 17.798751] (&syncp->seq#5){?.-.}, at: [<9b22e5f0>] asix_rx_fixup_internal+0x188/0x288 [ 17.806790] {IN-HARDIRQ-W} state was registered at: [ 17.811677] tx_complete+0x100/0x208 [ 17.815319] __usb_hcd_giveback_urb+0x60/0xf0 [ 17.819770] xhci_giveback_urb_in_irq+0xa8/0x240 [ 17.824469] xhci_td_cleanup+0xf4/0x16c [ 17.828367] xhci_irq+0xe74/0x2240 [ 17.831827] usb_hcd_irq+0x24/0x38 [ 17.835343] __handle_irq_event_percpu+0x98/0x510 [ 17.840111] handle_irq_event_percpu+0x1c/0x58 [ 17.844623] handle_irq_event+0x38/0x5c [ 17.848519] handle_fasteoi_irq+0xa4/0x138 [ 17.852681] generic_handle_irq+0x18/0x28 [ 17.856760] __handle_domain_irq+0x6c/0xe4 [ 17.860941] gic_handle_irq+0x54/0xa0 [ 17.864666] __irq_svc+0x70/0xb0 [ 17.867964] arch_cpu_idle+0x20/0x3c [ 17.871578] arch_cpu_idle+0x20/0x3c [ 17.875190] do_idle+0x144/0x218 [ 17.878468] cpu_startup_entry+0x18/0x1c [ 17.882454] start_kernel+0x394/0x400 [ 17.886177] irq event stamp: 161912 [ 17.889616] hardirqs last enabled at (161912): [<7bedfacf>] __netdev_alloc_skb+0xcc/0x140 [ 17.897893] hardirqs last disabled at (161911): [] __netdev_alloc_skb+0x94/0x140 [ 17.904903] exynos5-hsi2c 12ca0000.i2c: tx timeout [ 17.906116] softirqs last enabled at (161904): [<387102ff>] irq_enter+0x78/0x80 [ 17.906123] softirqs last disabled at (161905): [] irq_exit+0x134/0x158 [ 17.925722]. [ 17.925722] other info that might help us debug this: [ 17.933435] Possible unsafe locking scenario: [ 17.933435]. [ 17.940331] CPU0 [ 17.942488] ---- [ 17.944894] lock(&syncp->seq#5); [ 17.948274] [ 17.950847] lock(&syncp->seq#5); [ 17.954386]. [ 17.954386] *** DEADLOCK *** [ 17.954386]. [ 17.962422] no locks held by swapper/0/0. Fixes: c8b5d129ee29 ("net: usbnet: support 64bit stats") Signed-off-by: Eric Dumazet Reported-by: Marek Szyprowski Cc: Greg Ungerer Signed-off-by: David S. Miller --- include/linux/u64_stats_sync.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 5bdbd9f49395..07ee0f84a46c 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -90,6 +90,28 @@ static inline void u64_stats_update_end(struct u64_stats_sync *syncp) #endif } +static inline unsigned long +u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) +{ + unsigned long flags = 0; + +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + local_irq_save(flags); + write_seqcount_begin(&syncp->seq); +#endif + return flags; +} + +static inline void +u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) +{ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_end(&syncp->seq); + local_irq_restore(flags); +#endif +} + static inline void u64_stats_update_begin_raw(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) -- cgit v1.2.3 From 1ec54cb44e6731c3cb251bcf9251d65a4b4f6306 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 6 Mar 2018 10:56:31 +0100 Subject: net: unpollute priv_flags space the ipvlan device driver defines and uses 2 bits inside the priv_flags net_device field. Such bits and the related helper are used only inside the ipvlan device driver, and the core networking does not need to be aware of them. This change moves netif_is_ipvlan* helper in the ipvlan driver and re-implement them looking for ipvlan specific symbols instead of using priv_flags. Overall this frees two bits inside priv_flags - and move the following ones to avoid gaps - without any intended functional change. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dbe6344b727a..95a613a7cc1c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1381,8 +1381,6 @@ struct net_device_ops { * @IFF_MACVLAN: Macvlan device * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account * underlying stacked devices - * @IFF_IPVLAN_MASTER: IPvlan master device - * @IFF_IPVLAN_SLAVE: IPvlan slave device * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master @@ -1412,16 +1410,14 @@ enum netdev_priv_flags { IFF_LIVE_ADDR_CHANGE = 1<<15, IFF_MACVLAN = 1<<16, IFF_XMIT_DST_RELEASE_PERM = 1<<17, - IFF_IPVLAN_MASTER = 1<<18, - IFF_IPVLAN_SLAVE = 1<<19, - IFF_L3MDEV_MASTER = 1<<20, - IFF_NO_QUEUE = 1<<21, - IFF_OPENVSWITCH = 1<<22, - IFF_L3MDEV_SLAVE = 1<<23, - IFF_TEAM = 1<<24, - IFF_RXFH_CONFIGURED = 1<<25, - IFF_PHONY_HEADROOM = 1<<26, - IFF_MACSEC = 1<<27, + IFF_L3MDEV_MASTER = 1<<18, + IFF_NO_QUEUE = 1<<19, + IFF_OPENVSWITCH = 1<<20, + IFF_L3MDEV_SLAVE = 1<<21, + IFF_TEAM = 1<<22, + IFF_RXFH_CONFIGURED = 1<<23, + IFF_PHONY_HEADROOM = 1<<24, + IFF_MACSEC = 1<<25, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1442,8 +1438,6 @@ enum netdev_priv_flags { #define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE #define IFF_MACVLAN IFF_MACVLAN #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM -#define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER -#define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE #define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH @@ -4223,16 +4217,6 @@ static inline bool netif_is_macvlan_port(const struct net_device *dev) return dev->priv_flags & IFF_MACVLAN_PORT; } -static inline bool netif_is_ipvlan(const struct net_device *dev) -{ - return dev->priv_flags & IFF_IPVLAN_SLAVE; -} - -static inline bool netif_is_ipvlan_port(const struct net_device *dev) -{ - return dev->priv_flags & IFF_IPVLAN_MASTER; -} - static inline bool netif_is_bond_master(const struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; -- cgit v1.2.3 From f2531f1976d98a7a4328da7f3cbf31b7c1927738 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Mar 2018 12:18:33 -0800 Subject: pstore/ram: Do not use stack VLA for parity workspace Instead of using a stack VLA for the parity workspace, preallocate a memory region. The preallocation is done to keep from needing to perform allocations during crash dump writing, etc. This also fixes a missed release of librs on free. Signed-off-by: Kees Cook --- include/linux/pstore_ram.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 9395f06e8372..e6d226464838 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -39,6 +39,7 @@ struct persistent_ram_ecc_info { int ecc_size; int symsize; int poly; + uint16_t *par; }; struct persistent_ram_zone { -- cgit v1.2.3 From 581fdddee420cebe2cb781cb3c84c82676a86949 Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Sun, 22 Oct 2017 19:43:58 +0300 Subject: net/mlx5: IPSec, Generalize sandbox QP commands The current code assume only SA QP commands. Refactor in order to pave the way for new QP commands: 1. Generic cmd response format. 2. SA cmd checks are in dedicated functions. 3. Aligned debug prints. Signed-off-by: Yossi Kuperman Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc_fpga.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 255a88d08078..7283fe780f93 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -429,4 +429,20 @@ struct mlx5_ifc_ipsec_counters_bits { u8 dropped_cmd[0x40]; }; +enum mlx5_ifc_fpga_ipsec_response_syndrome { + MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0, + MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1, + MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2, + MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3, +}; + +struct mlx5_ifc_fpga_ipsec_cmd_resp { + __be32 syndrome; + union { + __be32 sw_sa_handle; + __be32 flags; + }; + u8 reserved[24]; +} __packed; + #endif /* MLX5_IFC_FPGA_H */ -- cgit v1.2.3 From 788a8210764ce2977095010931959c87b60c2f51 Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Sun, 22 Oct 2017 19:45:45 +0300 Subject: net/mlx5e: IPSec, Add support for ESP trailer removal by hardware Current hardware decrypts and authenticates incoming ESP packets. Subsequently, the software extracts the nexthdr field, truncates the trailer and adjusts csum accordingly. With this patch and a capable device, the trailer is being removed by the hardware and the nexthdr field is conveyed via PET. This way we avoid both the need to access the trailer (cache miss) and to compute its relative checksum, which significantly improve the performance. Experiment shows that trailer removal improves the performance by 2Gbps, (netperf). Both forwarding and host-to-host configurations. Signed-off-by: Yossi Kuperman Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc_fpga.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 7283fe780f93..643544db180b 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -373,7 +373,8 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits { struct mlx5_ifc_ipsec_extended_cap_bits { u8 encapsulation[0x20]; - u8 reserved_0[0x15]; + u8 reserved_0[0x14]; + u8 rx_no_trailer[0x1]; u8 ipv4_fragment[0x1]; u8 ipv6[0x1]; u8 esn[0x1]; @@ -445,4 +446,14 @@ struct mlx5_ifc_fpga_ipsec_cmd_resp { u8 reserved[24]; } __packed; +enum mlx5_ifc_fpga_ipsec_cap { + MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0), +}; + +struct mlx5_ifc_fpga_ipsec_cmd_cap { + __be32 cmd; + __be32 flags; + u8 reserved[24]; +} __packed; + #endif /* MLX5_IFC_FPGA_H */ -- cgit v1.2.3 From 65802f480008066636a43173b12388bb3fb7bd3a Mon Sep 17 00:00:00 2001 From: Aviad Yehezkel Date: Tue, 16 Jan 2018 16:12:22 +0200 Subject: net/mlx5: IPSec, Add command V2 support This patch adds V2 command support. New fpga devices support extended features (udp encap, esn etc...), this features require new hardware sadb format therefore we have a new version of commands to manipulate it. Signed-off-by: Yossef Efraim Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc_fpga.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 643544db180b..dd7e4538159c 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -373,7 +373,9 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits { struct mlx5_ifc_ipsec_extended_cap_bits { u8 encapsulation[0x20]; - u8 reserved_0[0x14]; + u8 reserved_0[0x12]; + u8 v2_command[0x1]; + u8 udp_encap[0x1]; u8 rx_no_trailer[0x1]; u8 ipv4_fragment[0x1]; u8 ipv6[0x1]; -- cgit v1.2.3 From 1d2005e2040b95af4c861e40cf806ff44cd7c883 Mon Sep 17 00:00:00 2001 From: Aviad Yehezkel Date: Mon, 29 Jan 2018 15:05:50 +0200 Subject: net/mlx5: Export ipsec capabilities We will need that for ipsec verbs. Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/accel.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 include/linux/mlx5/accel.h (limited to 'include/linux') diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h new file mode 100644 index 000000000000..601280c782d3 --- /dev/null +++ b/include/linux/mlx5/accel.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_ACCEL_H__ +#define __MLX5_ACCEL_H__ + +#include + +enum mlx5_accel_ipsec_caps { + MLX5_ACCEL_IPSEC_CAP_DEVICE = 1 << 0, + MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 2, + MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 3, + MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 4, + MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER = 1 << 5, + MLX5_ACCEL_IPSEC_CAP_V2_CMD = 1 << 6, +}; + +#ifdef CONFIG_MLX5_ACCEL + +u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); + +#else + +static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } + +#endif +#endif -- cgit v1.2.3 From af9fe19d660e333ca9b0a6e1506e684a1126b9e7 Mon Sep 17 00:00:00 2001 From: Aviad Yehezkel Date: Wed, 17 Jan 2018 11:20:33 +0200 Subject: net/mlx5: Added required metadata capability for ipsec Currently our device requires additional metadata in packet to perform ipsec crypto offload. Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/accel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index 601280c782d3..b674af63689b 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -38,6 +38,7 @@ enum mlx5_accel_ipsec_caps { MLX5_ACCEL_IPSEC_CAP_DEVICE = 1 << 0, + MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA = 1 << 1, MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 2, MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 3, MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 4, -- cgit v1.2.3 From d6c4f0298cec8c4c88d33aca17c066995e92fe91 Mon Sep 17 00:00:00 2001 From: Aviad Yehezkel Date: Thu, 18 Jan 2018 13:05:48 +0200 Subject: net/mlx5: Refactor accel IPSec code The current code has one layer that executed FPGA commands and the Ethernet part directly used this code. Since downstream patches introduces support for IPSec in mlx5_ib, we need to provide some abstractions. This patch refactors the accel code into one layer that creates a software IPSec transformation and another one which creates the actual hardware context. The internal command implementation is now hidden in the FPGA core layer. The code also adds the ability to share FPGA hardware contexts. If two contexts are the same, only a reference count is taken. Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/accel.h | 83 +++++++++++++++++++++++++++++++++++++- include/linux/mlx5/mlx5_ifc_fpga.h | 59 +++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index b674af63689b..da6de465ea6d 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -36,23 +36,102 @@ #include -enum mlx5_accel_ipsec_caps { +enum mlx5_accel_esp_aes_gcm_keymat_iv_algo { + MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ, +}; + +enum mlx5_accel_esp_flags { + MLX5_ACCEL_ESP_FLAGS_TUNNEL = 0, /* Default */ + MLX5_ACCEL_ESP_FLAGS_TRANSPORT = 1UL << 0, + MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED = 1UL << 1, + MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2, +}; + +enum mlx5_accel_esp_action { + MLX5_ACCEL_ESP_ACTION_DECRYPT, + MLX5_ACCEL_ESP_ACTION_ENCRYPT, +}; + +enum mlx5_accel_esp_keymats { + MLX5_ACCEL_ESP_KEYMAT_AES_NONE, + MLX5_ACCEL_ESP_KEYMAT_AES_GCM, +}; + +enum mlx5_accel_esp_replay { + MLX5_ACCEL_ESP_REPLAY_NONE, + MLX5_ACCEL_ESP_REPLAY_BMP, +}; + +struct aes_gcm_keymat { + u64 seq_iv; + enum mlx5_accel_esp_aes_gcm_keymat_iv_algo iv_algo; + + u32 salt; + u32 icv_len; + + u32 key_len; + u32 aes_key[256 / 32]; +}; + +struct mlx5_accel_esp_xfrm_attrs { + enum mlx5_accel_esp_action action; + u32 esn; + u32 spi; + u32 seq; + u32 tfc_pad; + u32 flags; + u32 sa_handle; + enum mlx5_accel_esp_replay replay_type; + union { + struct { + u32 size; + + } bmp; + } replay; + enum mlx5_accel_esp_keymats keymat_type; + union { + struct aes_gcm_keymat aes_gcm; + } keymat; +}; + +struct mlx5_accel_esp_xfrm { + struct mlx5_core_dev *mdev; + struct mlx5_accel_esp_xfrm_attrs attrs; +}; + +enum { + MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0, +}; + +enum mlx5_accel_ipsec_cap { MLX5_ACCEL_IPSEC_CAP_DEVICE = 1 << 0, MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA = 1 << 1, MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 2, MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 3, MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 4, MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER = 1 << 5, - MLX5_ACCEL_IPSEC_CAP_V2_CMD = 1 << 6, }; #ifdef CONFIG_MLX5_ACCEL u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); +struct mlx5_accel_esp_xfrm * +mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags); +void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm); + #else static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } +static inline struct mlx5_accel_esp_xfrm * +mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs, + u32 flags) { return ERR_PTR(-EOPNOTSUPP); } +static inline void +mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {} + #endif #endif diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index dd7e4538159c..debcc57de43a 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -448,6 +448,15 @@ struct mlx5_ifc_fpga_ipsec_cmd_resp { u8 reserved[24]; } __packed; +enum mlx5_ifc_fpga_ipsec_cmd_opcode { + MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0, + MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1, + MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2, + MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3, + MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4, + MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5, +}; + enum mlx5_ifc_fpga_ipsec_cap { MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0), }; @@ -458,4 +467,54 @@ struct mlx5_ifc_fpga_ipsec_cmd_cap { u8 reserved[24]; } __packed; +enum mlx5_ifc_fpga_ipsec_sa_flags { + MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2), + MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3), + MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4), + MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5), + MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6), + MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7), +}; + +enum mlx5_ifc_fpga_ipsec_sa_enc_mode { + MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0, + MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1, + MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3, +}; + +struct mlx5_ifc_fpga_ipsec_sa_v1 { + __be32 cmd; + u8 key_enc[32]; + u8 key_auth[32]; + __be32 sip[4]; + __be32 dip[4]; + union { + struct { + __be32 reserved; + u8 salt_iv[8]; + __be32 salt; + } __packed gcm; + struct { + u8 salt[16]; + } __packed cbc; + }; + __be32 spi; + __be32 sw_sa_handle; + __be16 tfclen; + u8 enc_mode; + u8 reserved1[2]; + u8 flags; + u8 reserved2[2]; +}; + +struct mlx5_ifc_fpga_ipsec_sa { + struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1; + __be16 udp_sp; + __be16 udp_dp; + u8 reserved1[4]; + __be32 esn; + __be16 vid; /* only 12 bits, rest is reserved */ + __be16 reserved2; +} __packed; + #endif /* MLX5_IFC_FPGA_H */ -- cgit v1.2.3 From 05564d0ae075b7a73339eaa05296c3034e439c32 Mon Sep 17 00:00:00 2001 From: Aviad Yehezkel Date: Sun, 18 Feb 2018 15:07:20 +0200 Subject: net/mlx5: Add flow-steering commands for FPGA IPSec implementation In order to add a context to the FPGA, we need to get both the software transform context (which includes the keys, etc) and the source/destination IPs (which are included in the steering rule). Therefore, we register new set of firmware like commands for the FPGA. Each time a rule is added, the steering core infrastructure calls the FPGA command layer. If the rule is intended for the FPGA, it combines the IPs information with the software transformation context and creates the respective hardware transform. Afterwards, it calls the standard steering command layer. Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/linux/mlx5/accel.h | 5 +++++ include/linux/mlx5/fs.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index da6de465ea6d..6c694709b0a2 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -121,6 +121,8 @@ mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, const struct mlx5_accel_esp_xfrm_attrs *attrs, u32 flags); void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm); +int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs); #else @@ -132,6 +134,9 @@ mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, u32 flags) { return ERR_PTR(-EOPNOTSUPP); } static inline void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {} +static inline int +mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; } #endif #endif diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 744ea228acea..b957e52434f8 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -40,6 +40,8 @@ enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, + MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17, + MLX5_FLOW_CONTEXT_ACTION_DECRYPT = 1 << 18, }; enum { @@ -146,6 +148,7 @@ struct mlx5_flow_act { u32 flow_tag; u32 encap_id; u32 modify_id; + uintptr_t esp_id; }; #define MLX5_DECLARE_FLOW_ACT(name) \ -- cgit v1.2.3 From cb01008390bb0645d4728c7f8825e32d4b540a30 Mon Sep 17 00:00:00 2001 From: Aviad Yehezkel Date: Thu, 18 Jan 2018 16:02:17 +0200 Subject: net/mlx5: IPSec, Add support for ESN Currently ESN is not supported with IPSec device offload. This patch adds ESN support to IPsec device offload. Implementing new xfrm device operation to synchronize offloading device ESN with xfrm received SN. New QP command to update SA state at the following: ESN 1 ESN 2 ESN 3 |-----------*-----------|-----------*-----------|-----------* ^ ^ ^ ^ ^ ^ ^ - marks where QP command invoked to update the SA ESN state machine. | - marks the start of the ESN scope (0-2^32-1). At this point move SA ESN overlap bit to zero and increment ESN. * - marks the middle of the ESN scope (2^31). At this point move SA ESN overlap bit to one. Signed-off-by: Aviad Yehezkel Signed-off-by: Yossef Efraim Signed-off-by: Saeed Mahameed --- include/linux/mlx5/accel.h | 2 ++ include/linux/mlx5/mlx5_ifc_fpga.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index 6c694709b0a2..70e7e5673ce9 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -110,6 +110,8 @@ enum mlx5_accel_ipsec_cap { MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 3, MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 4, MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER = 1 << 5, + MLX5_ACCEL_IPSEC_CAP_ESN = 1 << 6, + MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN = 1 << 7, }; #ifdef CONFIG_MLX5_ACCEL diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index debcc57de43a..ec052491ba3d 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -468,6 +468,8 @@ struct mlx5_ifc_fpga_ipsec_cmd_cap { } __packed; enum mlx5_ifc_fpga_ipsec_sa_flags { + MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0), + MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1), MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2), MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3), MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4), -- cgit v1.2.3 From 370ed7a9b9176d68c7b13e6cef32efa6ac5b2d97 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 27 Feb 2018 13:22:07 +0100 Subject: extcon: add possibility to get extcon device by OF node Since extcon property is not allowed in DT, extcon subsystem requires another way to get extcon device. Lets try the simplest approach - get edev by of_node. Signed-off-by: Andrzej Hajda Acked-by: Chanwoo Choi Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 6d94e82c8ad9..7f033b1ea568 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -230,6 +230,7 @@ extern void devm_extcon_unregister_notifier_all(struct device *dev, * Following APIs get the extcon_dev from devicetree or by through extcon name. */ extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name); +extern struct extcon_dev *extcon_find_edev_by_node(struct device_node *node); extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index); @@ -283,6 +284,11 @@ static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) return ERR_PTR(-ENODEV); } +static inline struct extcon_dev *extcon_find_edev_by_node(struct device_node *node) +{ + return ERR_PTR(-ENODEV); +} + static inline struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index) { -- cgit v1.2.3 From 3b3cd24ae61b3bbe9d3cecaff33e7cb3250ce47a Mon Sep 17 00:00:00 2001 From: Manu Gautam Date: Tue, 16 Jan 2018 16:27:09 +0530 Subject: phy: Add USB speed related PHY modes Add following USB speed related PHY modes: LS (Low Speed), FS (Full Speed), HS (High Speed), SS (Super Speed) Speed related information is required by some QCOM PHY drivers to program PHY monitor resume/remote-wakeup events in suspended state. Speed is needed in order to set correct polarity of wakeup events for detection. E.g. QUSB2 PHY monitors DP/DM line state depending on whether speed is LS or FS/HS to detect resume. Similarly QMP USB3 PHY in SS mode should monitor RX terminations attach/detach and LFPS events depending on SSPHY is active or not. Signed-off-by: Manu Gautam Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 4f8423a948d5..485469e6fa7f 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -25,7 +25,15 @@ struct phy; enum phy_mode { PHY_MODE_INVALID, PHY_MODE_USB_HOST, + PHY_MODE_USB_HOST_LS, + PHY_MODE_USB_HOST_FS, + PHY_MODE_USB_HOST_HS, + PHY_MODE_USB_HOST_SS, PHY_MODE_USB_DEVICE, + PHY_MODE_USB_DEVICE_LS, + PHY_MODE_USB_DEVICE_FS, + PHY_MODE_USB_DEVICE_HS, + PHY_MODE_USB_DEVICE_SS, PHY_MODE_USB_OTG, PHY_MODE_SGMII, PHY_MODE_10GKR, @@ -61,6 +69,7 @@ struct phy_ops { */ struct phy_attrs { u32 bus_width; + enum phy_mode mode; }; /** @@ -144,6 +153,10 @@ int phy_exit(struct phy *phy); int phy_power_on(struct phy *phy); int phy_power_off(struct phy *phy); int phy_set_mode(struct phy *phy, enum phy_mode mode); +static inline enum phy_mode phy_get_mode(struct phy *phy) +{ + return phy->attrs.mode; +} int phy_reset(struct phy *phy); int phy_calibrate(struct phy *phy); static inline int phy_get_bus_width(struct phy *phy) @@ -260,6 +273,11 @@ static inline int phy_set_mode(struct phy *phy, enum phy_mode mode) return -ENOSYS; } +static inline enum phy_mode phy_get_mode(struct phy *phy) +{ + return PHY_MODE_INVALID; +} + static inline int phy_reset(struct phy *phy) { if (!phy) -- cgit v1.2.3 From becaf17a58473e358e056ada2642e895aae93b0e Mon Sep 17 00:00:00 2001 From: Dov Levenglick Date: Fri, 2 Feb 2018 18:34:50 +0200 Subject: phy: fix structure documentation Add missing documentation of structure members and modify the order of documentation to match that of the structure declaration. Signed-off-by: Dov Levenglick Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 485469e6fa7f..c9d14eeee7f5 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -81,7 +81,8 @@ struct phy_attrs { * @mutex: mutex to protect phy_ops * @init_count: used to protect when the PHY is used by multiple consumers * @power_count: used to protect when the PHY is used by multiple consumers - * @phy_attrs: used to specify PHY specific attributes + * @attrs: used to specify PHY specific attributes + * @pwr: power regulator associated with the phy */ struct phy { struct device dev; @@ -97,9 +98,10 @@ struct phy { /** * struct phy_provider - represents the phy provider * @dev: phy provider device + * @children: can be used to override the default (dev->of_node) child node * @owner: the module owner having of_xlate - * @of_xlate: function pointer to obtain phy instance from phy pointer * @list: to maintain a linked list of PHY providers + * @of_xlate: function pointer to obtain phy instance from phy pointer */ struct phy_provider { struct device *dev; @@ -110,6 +112,13 @@ struct phy_provider { struct of_phandle_args *args); }; +/** + * struct phy_lookup - PHY association in list of phys managed by the phy driver + * @node: list node + * @dev_id: the device of the association + * @con_id: connection ID string on device + * @phy: the phy of the association + */ struct phy_lookup { struct list_head node; const char *dev_id; -- cgit v1.2.3 From 4902c2025b8ade9c230d4bca25ec5f691e91cb1f Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 26 Jul 2017 16:47:27 +0300 Subject: clk: ti: add support for register read-modify-write low-level operation Useful for changing few bits on a register, this makes sure for example that the operation is done atomically in case of syscon. Signed-off-by: Tero Kristo --- include/linux/clk/ti.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index d18da839b810..9e8611470187 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -211,6 +211,7 @@ enum { * struct ti_clk_ll_ops - low-level ops for clocks * @clk_readl: pointer to register read function * @clk_writel: pointer to register write function + * @clk_rmw: pointer to register read-modify-write function * @clkdm_clk_enable: pointer to clockdomain enable function * @clkdm_clk_disable: pointer to clockdomain disable function * @clkdm_lookup: pointer to clockdomain lookup function @@ -226,6 +227,7 @@ enum { struct ti_clk_ll_ops { u32 (*clk_readl)(const struct clk_omap_reg *reg); void (*clk_writel)(u32 val, const struct clk_omap_reg *reg); + void (*clk_rmw)(u32 val, u32 mask, const struct clk_omap_reg *reg); int (*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk); int (*clkdm_clk_disable)(struct clockdomain *clkdm, struct clk *clk); -- cgit v1.2.3 From 63338a38db955cb4e0352c11b78732157c78d30b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 7 Mar 2018 08:39:12 +0100 Subject: jailhouse: Provide detection for non-x86 systems Implement jailhouse_paravirt() via device tree probing on architectures != x86. Will be used by the PCI core. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: jailhouse-dev@googlegroups.com Cc: Mark Rutland Cc: linux-pci@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: Andy Shevchenko Cc: Rob Herring Cc: Bjorn Helgaas Link: https://lkml.kernel.org/r/dae9fe0c6e63141c28ca90492fa5712b4c33ffb5.1520408357.git.jan.kiszka@siemens.com --- include/linux/hypervisor.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h index b19563f9a8eb..fc08b433c856 100644 --- a/include/linux/hypervisor.h +++ b/include/linux/hypervisor.h @@ -8,15 +8,28 @@ */ #ifdef CONFIG_X86 + +#include #include + static inline void hypervisor_pin_vcpu(int cpu) { x86_platform.hyper.pin_vcpu(cpu); } -#else + +#else /* !CONFIG_X86 */ + +#include + static inline void hypervisor_pin_vcpu(int cpu) { } -#endif + +static inline bool jailhouse_paravirt(void) +{ + return of_find_compatible_node(NULL, NULL, "jailhouse,cell"); +} + +#endif /* !CONFIG_X86 */ #endif /* __LINUX_HYPEVISOR_H */ -- cgit v1.2.3 From 5d6ae4f0da8a64a185074dabb1b2f8c148efa741 Mon Sep 17 00:00:00 2001 From: Chris Dickens Date: Sun, 31 Dec 2017 18:59:42 -0800 Subject: usb: gadget: composite: fix incorrect handling of OS desc requests When handling an OS descriptor request, one of the first operations is to zero out the request buffer using the wLength from the setup packet. There is no bounds checking, so a wLength > 4096 would clobber memory adjacent to the request buffer. Fix this by taking the min of wLength and the request buffer length prior to the memset. While at it, define the buffer length in a header file so that magic numbers don't appear throughout the code. When returning data to the host, the data length should be the min of the wLength and the valid data we have to return. Currently we are returning wLength, thus requests for a wLength greater than the amount of data in the OS descriptor buffer would return invalid (albeit zero'd) data following the valid descriptor data. Fix this by counting the number of bytes when constructing the data and using this when determining the length of the request. Signed-off-by: Chris Dickens Signed-off-by: Felipe Balbi --- include/linux/usb/composite.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index cef0e44601f8..4b6b9283fa7b 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -54,6 +54,9 @@ /* big enough to hold our biggest descriptor */ #define USB_COMP_EP0_BUFSIZ 1024 +/* OS feature descriptor length <= 4kB */ +#define USB_COMP_EP0_OS_DESC_BUFSIZ 4096 + #define USB_MS_TO_HS_INTERVAL(x) (ilog2((x * 1000 / 125)) + 1) struct usb_configuration; -- cgit v1.2.3 From f2b9ba871beb92fd6884b957acb14621b15fbe2b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Mar 2018 17:15:32 +0000 Subject: arm64/kernel: kaslr: reduce module randomization range to 4 GB We currently have to rely on the GCC large code model for KASLR for two distinct but related reasons: - if we enable full randomization, modules will be loaded very far away from the core kernel, where they are out of range for ADRP instructions, - even without full randomization, the fact that the 128 MB module region is now no longer fully reserved for kernel modules means that there is a very low likelihood that the normal bottom-up allocation of other vmalloc regions may collide, and use up the range for other things. Large model code is suboptimal, given that each symbol reference involves a literal load that goes through the D-cache, reducing cache utilization. But more importantly, literals are not instructions but part of .text nonetheless, and hence mapped with executable permissions. So let's get rid of our dependency on the large model for KASLR, by: - reducing the full randomization range to 4 GB, thereby ensuring that ADRP references between modules and the kernel are always in range, - reduce the spillover range to 4 GB as well, so that we fallback to a region that is still guaranteed to be in range - move the randomization window of the core kernel to the middle of the VMALLOC space Note that KASAN always uses the module region outside of the vmalloc space, so keep the kernel close to that if KASAN is enabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- include/linux/sizes.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sizes.h b/include/linux/sizes.h index ce3e8150c174..fbde0bc7e882 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -8,6 +8,8 @@ #ifndef __LINUX_SIZES_H__ #define __LINUX_SIZES_H__ +#include + #define SZ_1 0x00000001 #define SZ_2 0x00000002 #define SZ_4 0x00000004 @@ -44,4 +46,6 @@ #define SZ_1G 0x40000000 #define SZ_2G 0x80000000 +#define SZ_4G _AC(0x100000000, ULL) + #endif /* __LINUX_SIZES_H__ */ -- cgit v1.2.3 From e403d00573431e1e3de1710a91c6090c60ec16af Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Thu, 25 Jan 2018 16:00:12 +0200 Subject: clk: tegra: MBIST work around for Tegra210 Tegra210 has a hw bug which can cause IP blocks to lock up when ungating a domain. The reason is that the logic responsible for resetting the memory built-in self test mode can come up in an undefined state because its clock is gated by a second level clock gate (SLCG). Work around this by making sure the logic will get some clock edges by ensuring the relevant clock is enabled and temporarily override the relevant SLCGs. Unfortunately for some IP blocks, the control bits for overriding the SLCGs are not in CAR, but in the IP block itself. This means we need to map a few extra register banks in the clock code. Signed-off-by: Peter De Schrijver Reviewed-by: Jon Hunter Tested-by: Jon Hunter Tested-by: Hector Martin Tested-by: Andre Heider Tested-by: Mikko Perttunen Signed-off-by: Thierry Reding fixup mbist --- include/linux/clk/tegra.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index d23c9cf26993..afb9edfa5d58 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -128,5 +128,6 @@ extern void tegra210_sata_pll_hw_sequence_start(void); extern void tegra210_set_sata_pll_seq_sw(bool state); extern void tegra210_put_utmipll_in_iddq(void); extern void tegra210_put_utmipll_out_iddq(void); +extern int tegra210_clk_handle_mbist_war(unsigned int id); #endif /* __LINUX_CLK_TEGRA_H_ */ -- cgit v1.2.3 From 66f91322f39cd18a01524264464c2ff4c98c936e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Mar 2018 17:10:02 -0800 Subject: block: Reorder the queue flag manipulation function definitions Move the definition of queue_flag_clear_unlocked() up and move the definition of queue_in_flight() down such that all queue flag manipulation function definitions become contiguous. This patch does not change any functionality. Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ming Lei Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 667a9b0053d9..c351aaec3ca7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -726,6 +726,12 @@ static inline void queue_flag_set_unlocked(unsigned int flag, __set_bit(flag, &q->queue_flags); } +static inline void queue_flag_clear_unlocked(unsigned int flag, + struct request_queue *q) +{ + __clear_bit(flag, &q->queue_flags); +} + static inline int queue_flag_test_and_clear(unsigned int flag, struct request_queue *q) { @@ -758,17 +764,6 @@ static inline void queue_flag_set(unsigned int flag, struct request_queue *q) __set_bit(flag, &q->queue_flags); } -static inline void queue_flag_clear_unlocked(unsigned int flag, - struct request_queue *q) -{ - __clear_bit(flag, &q->queue_flags); -} - -static inline int queue_in_flight(struct request_queue *q) -{ - return q->in_flight[0] + q->in_flight[1]; -} - static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) { queue_lockdep_assert_held(q); @@ -804,6 +799,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) extern int blk_set_preempt_only(struct request_queue *q); extern void blk_clear_preempt_only(struct request_queue *q); +static inline int queue_in_flight(struct request_queue *q) +{ + return q->in_flight[0] + q->in_flight[1]; +} + static inline bool blk_account_rq(struct request *rq) { return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); -- cgit v1.2.3 From 8814ce8a0f680599a837af18aefdec774e5c7b97 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Mar 2018 17:10:04 -0800 Subject: block: Introduce blk_queue_flag_{set,clear,test_and_{set,clear}}() Introduce functions that modify the queue flags and that protect these modifications with the request queue lock. Except for moving one wake_up_all() call from inside to outside a critical section, this patch does not change any functionality. Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ming Lei Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c351aaec3ca7..f84b3c7887b1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -707,6 +707,11 @@ struct request_queue { (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_POLL)) +void blk_queue_flag_set(unsigned int flag, struct request_queue *q); +void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); +bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); +bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); + /* * @q->queue_lock is set while a queue is being initialized. Since we know * that no other threads access the queue object before @q->queue_lock has -- cgit v1.2.3 From 1db2008b79a32db2ad41338c6c74c4735cf74f6d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Mar 2018 17:10:11 -0800 Subject: block: Complain if queue_flag_(set|clear)_unlocked() is abused Since it is not safe to use queue_flag_(set|clear)_unlocked() without holding the queue lock after the sysfs entries for a queue have been created, complain if this happens. Cc: Mike Snitzer Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ming Lei Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f84b3c7887b1..888c9b25cb8f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -728,12 +728,18 @@ static inline void queue_lockdep_assert_held(struct request_queue *q) static inline void queue_flag_set_unlocked(unsigned int flag, struct request_queue *q) { + if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) && + kref_read(&q->kobj.kref)) + lockdep_assert_held(q->queue_lock); __set_bit(flag, &q->queue_flags); } static inline void queue_flag_clear_unlocked(unsigned int flag, struct request_queue *q) { + if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) && + kref_read(&q->kobj.kref)) + lockdep_assert_held(q->queue_lock); __clear_bit(flag, &q->queue_flags); } -- cgit v1.2.3 From 8a0ac14b8da9b86cfbe7aace40c8d485ed5c5b97 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Mar 2018 17:10:12 -0800 Subject: block: Move the queue_flag_*() functions from a public into a private header file This patch helps to avoid that new code gets introduced in block drivers that manipulates queue flags without holding the queue lock when that lock should be held. Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ming Lei Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 69 -------------------------------------------------- 1 file changed, 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 888c9b25cb8f..19eaf8d89368 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -712,75 +712,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); -/* - * @q->queue_lock is set while a queue is being initialized. Since we know - * that no other threads access the queue object before @q->queue_lock has - * been set, it is safe to manipulate queue flags without holding the - * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and - * blk_init_allocated_queue(). - */ -static inline void queue_lockdep_assert_held(struct request_queue *q) -{ - if (q->queue_lock) - lockdep_assert_held(q->queue_lock); -} - -static inline void queue_flag_set_unlocked(unsigned int flag, - struct request_queue *q) -{ - if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) && - kref_read(&q->kobj.kref)) - lockdep_assert_held(q->queue_lock); - __set_bit(flag, &q->queue_flags); -} - -static inline void queue_flag_clear_unlocked(unsigned int flag, - struct request_queue *q) -{ - if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) && - kref_read(&q->kobj.kref)) - lockdep_assert_held(q->queue_lock); - __clear_bit(flag, &q->queue_flags); -} - -static inline int queue_flag_test_and_clear(unsigned int flag, - struct request_queue *q) -{ - queue_lockdep_assert_held(q); - - if (test_bit(flag, &q->queue_flags)) { - __clear_bit(flag, &q->queue_flags); - return 1; - } - - return 0; -} - -static inline int queue_flag_test_and_set(unsigned int flag, - struct request_queue *q) -{ - queue_lockdep_assert_held(q); - - if (!test_bit(flag, &q->queue_flags)) { - __set_bit(flag, &q->queue_flags); - return 0; - } - - return 1; -} - -static inline void queue_flag_set(unsigned int flag, struct request_queue *q) -{ - queue_lockdep_assert_held(q); - __set_bit(flag, &q->queue_flags); -} - -static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) -{ - queue_lockdep_assert_held(q); - __clear_bit(flag, &q->queue_flags); -} - #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) -- cgit v1.2.3 From 84a1d9c4820080bebcbd413a845076dcb62f45fa Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 8 Mar 2018 15:45:03 +0000 Subject: net: ethtool: extend RXNFC API to support RSS spreading of filter matches We use a two-step process to configure a filter with RSS spreading. First, the RSS context is allocated and configured using ETHTOOL_SRSSH; this returns an identifier (rss_context) which can then be passed to subsequent invocations of ETHTOOL_SRXCLSRLINS to specify that the offset from the RSS indirection table lookup should be added to the queue number (ring_cookie) when delivering the packet. Drivers for devices which can only use the indirection table entry directly (not add it to a base queue number) should reject rule insertions combining RSS with a nonzero ring_cookie. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/ethtool.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 2ec41a7eb54f..ebe41811ed34 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -371,6 +371,11 @@ struct ethtool_ops { u8 *hfunc); int (*set_rxfh)(struct net_device *, const u32 *indir, const u8 *key, const u8 hfunc); + int (*get_rxfh_context)(struct net_device *, u32 *indir, u8 *key, + u8 *hfunc, u32 rss_context); + int (*set_rxfh_context)(struct net_device *, const u32 *indir, + const u8 *key, const u8 hfunc, + u32 *rss_context, bool delete); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); -- cgit v1.2.3 From 4042d003a0792a3b05c7c424219e4c6cf1abfe76 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 Dec 2017 15:37:26 +0100 Subject: cpufreq/schedutil: Remove unused CPUFREQ_DL Bitrot... Signed-off-by: Peter Zijlstra (Intel) Cc: Juri Lelli Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Viresh Kumar Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/cpufreq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index 0b55834efd46..d963cfd3a0c2 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -9,8 +9,7 @@ */ #define SCHED_CPUFREQ_RT (1U << 0) -#define SCHED_CPUFREQ_DL (1U << 1) -#define SCHED_CPUFREQ_IOWAIT (1U << 2) +#define SCHED_CPUFREQ_IOWAIT (1U << 1) #ifdef CONFIG_CPU_FREQ struct update_util_data { -- cgit v1.2.3 From 8f111bc357aa811e0bb5fdfe34c4c9efdafc15b9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 Dec 2017 16:26:12 +0100 Subject: cpufreq/schedutil: Rewrite CPUFREQ_RT support Instead of trying to duplicate scheduler state to track if an RT task is running, directly use the scheduler runqueue state for it. This vastly simplifies things and fixes a number of bugs related to sugov and the scheduler getting out of sync wrt this state. As a consequence we not also update the remove cfs/dl state when iterating the shared mask. Signed-off-by: Peter Zijlstra (Intel) Cc: Juri Lelli Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Viresh Kumar Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/cpufreq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index d963cfd3a0c2..b48f2fb3b316 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -8,8 +8,7 @@ * Interface between cpufreq drivers and the scheduler: */ -#define SCHED_CPUFREQ_RT (1U << 0) -#define SCHED_CPUFREQ_IOWAIT (1U << 1) +#define SCHED_CPUFREQ_IOWAIT (1U << 0) #ifdef CONFIG_CPU_FREQ struct update_util_data { -- cgit v1.2.3 From 00357f5ec5d67a52a175da6f29f85c2c19d59bc8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Dec 2017 15:06:50 +0100 Subject: sched/nohz: Clean up nohz enter/exit The primary observation is that nohz enter/exit is always from the current CPU, therefore NOHZ_TICK_STOPPED does not in fact need to be an atomic. Secondary is that we appear to have 2 nearly identical hooks in the nohz enter code, set_cpu_sd_state_idle() and nohz_balance_enter_idle(). Fold the whole set_cpu_sd_state thing into nohz_balance_{enter,exit}_idle. Removes an atomic op from both enter and exit paths. Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 094217273ff9..b36f4cf38111 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -16,11 +16,9 @@ static inline void cpu_load_update_nohz_stop(void) { } #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); -extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else static inline void nohz_balance_enter_idle(int cpu) { } -static inline void set_cpu_sd_state_idle(void) { } #endif #ifdef CONFIG_NO_HZ_COMMON -- cgit v1.2.3 From ea14b57e8a181ac0561eba7a787e088f8c89f822 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 2 Feb 2018 10:27:00 +0100 Subject: sched/cpufreq: Provide migration hint It was suggested that a migration hint might be usefull for the CPU-freq governors. Signed-off-by: Peter Zijlstra (Intel) Cc: Juri Lelli Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Viresh Kumar Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index b48f2fb3b316..59667444669f 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -9,6 +9,7 @@ */ #define SCHED_CPUFREQ_IOWAIT (1U << 0) +#define SCHED_CPUFREQ_MIGRATION (1U << 1) #ifdef CONFIG_CPU_FREQ struct update_util_data { -- cgit v1.2.3 From 484cb153fe5ffcd0b7cf423cf29aaeadd0e862b1 Mon Sep 17 00:00:00 2001 From: Radion Mirchevsky Date: Wed, 4 Oct 2017 14:53:54 +0300 Subject: thunderbolt: Add tb_xdomain_find_by_route() This is needed by the new ICM interface to find xdomains by route string instead of link and depth. While there update existing tb_xdomain_find_* functions to use tb_xdomain_get() instead of open-coding the same. Signed-off-by: Radion Mirchevsky Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko --- include/linux/thunderbolt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 7b69853188b1..27b9be34d4b9 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -237,6 +237,7 @@ int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path, u16 receive_ring); int tb_xdomain_disable_paths(struct tb_xdomain *xd); struct tb_xdomain *tb_xdomain_find_by_uuid(struct tb *tb, const uuid_t *uuid); +struct tb_xdomain *tb_xdomain_find_by_route(struct tb *tb, u64 route); static inline struct tb_xdomain * tb_xdomain_find_by_uuid_locked(struct tb *tb, const uuid_t *uuid) @@ -250,6 +251,18 @@ tb_xdomain_find_by_uuid_locked(struct tb *tb, const uuid_t *uuid) return xd; } +static inline struct tb_xdomain * +tb_xdomain_find_by_route_locked(struct tb *tb, u64 route) +{ + struct tb_xdomain *xd; + + mutex_lock(&tb->lock); + xd = tb_xdomain_find_by_route(tb, route); + mutex_unlock(&tb->lock); + + return xd; +} + static inline struct tb_xdomain *tb_xdomain_get(struct tb_xdomain *xd) { if (xd) -- cgit v1.2.3 From 9aaa3b8b4c56d24210acef37b7c800ca218c3d40 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 21 Jan 2018 12:08:04 +0200 Subject: thunderbolt: Add support for preboot ACL Preboot ACL is a mechanism that allows connecting Thunderbolt devices boot time in more secure way than the legacy Thunderbolt boot support. As with the legacy boot option, this also needs to be enabled from the BIOS before booting is allowed. Difference to the legacy mode is that the userspace software explicitly adds device UUIDs by sending a special message to the ICM firmware. Only the devices listed in the boot ACL are connected automatically during the boot. This works in both "user" and "secure" security levels. We implement this in Linux by exposing a new sysfs attribute (boot_acl) below each Thunderbolt domain. The userspace software can then update the full list as needed. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko --- include/linux/thunderbolt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 27b9be34d4b9..47251844d064 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -65,6 +65,7 @@ enum tb_security_level { * @cm_ops: Connection manager specific operations vector * @index: Linux assigned domain number * @security_level: Current security level + * @nboot_acl: Number of boot ACLs the domain supports * @privdata: Private connection manager specific data */ struct tb { @@ -77,6 +78,7 @@ struct tb { const struct tb_cm_ops *cm_ops; int index; enum tb_security_level security_level; + size_t nboot_acl; unsigned long privdata[0]; }; -- cgit v1.2.3 From 6fc14e1a44e53c472865252b47398346a27d600e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 8 Dec 2017 14:11:39 +0300 Subject: thunderbolt: Introduce USB only (SL4) security level This new security level works so that it creates one PCIe tunnel to the connected Thunderbolt dock, removing PCIe links downstream of the dock. This leaves only the internal USB controller visible. Display Port tunnels are created normally. While there make sure security sysfs attribute returns "unknown" for any future security level. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko --- include/linux/thunderbolt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 47251844d064..a3ed26082bc1 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -45,12 +45,16 @@ enum tb_cfg_pkg_type { * @TB_SECURITY_USER: User approval required at minimum * @TB_SECURITY_SECURE: One time saved key required at minimum * @TB_SECURITY_DPONLY: Only tunnel Display port (and USB) + * @TB_SECURITY_USBONLY: Only tunnel USB controller of the connected + * Thunderbolt dock (and Display Port). All PCIe + * links downstream of the dock are removed. */ enum tb_security_level { TB_SECURITY_NONE, TB_SECURITY_USER, TB_SECURITY_SECURE, TB_SECURITY_DPONLY, + TB_SECURITY_USBONLY, }; /** -- cgit v1.2.3 From 79134e6ce2c9d1a00eab4d98cb48f975dd2474cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Mar 2018 12:51:41 -0800 Subject: net: do not create fallback tunnels for non-default namespaces fallback tunnels (like tunl0, gre0, gretap0, erspan0, sit0, ip6tnl0, ip6gre0) are automatically created when the corresponding module is loaded. These tunnels are also automatically created when a new network namespace is created, at a great cost. In many cases, netns are used for isolation purposes, and these extra network devices are a waste of resources. We are using thousands of netns per host, and hit the netns creation/delete bottleneck a lot. (Many thanks to Kirill for recent work on this) Add a new sysctl so that we can opt-out from this automatic creation. Note that these tunnels are still created for the initial namespace, to be the least intrusive for typical setups. Tested: lpk43:~# cat add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait lpk43:~# echo 0 >/proc/sys/net/core/fb_tunnels_only_for_init_net lpk43:~# time ./add_del_unshare.sh real 0m37.521s user 0m0.886s sys 7m7.084s lpk43:~# echo 1 >/proc/sys/net/core/fb_tunnels_only_for_init_net lpk43:~# time ./add_del_unshare.sh real 0m4.761s user 0m0.851s sys 1m8.343s lpk43:~# Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 95a613a7cc1c..9711108c3916 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -585,6 +585,13 @@ struct netdev_queue { #endif } ____cacheline_aligned_in_smp; +extern int sysctl_fb_tunnels_only_for_init_net; + +static inline bool net_has_fallback_tunnels(const struct net *net) +{ + return net == &init_net || !sysctl_fb_tunnels_only_for_init_net; +} + static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) { #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) -- cgit v1.2.3 From 3a4030761ea88ff439030ca98e3094b9900e96b7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 9 Mar 2018 14:50:34 +0800 Subject: vhost_net: examine pointer types during un-producing After commit fc72d1d54dd9 ("tuntap: XDP transmission"), we can actually queueing XDP pointers in the pointer ring, so we should examine the pointer type before freeing the pointer. Fixes: fc72d1d54dd9 ("tuntap: XDP transmission") Reported-by: Michael S. Tsirkin Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/if_tun.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index c5b0a75a7812..fd00170b494f 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -25,6 +25,7 @@ struct ptr_ring *tun_get_tx_ring(struct file *file); bool tun_is_xdp_buff(void *ptr); void *tun_xdp_to_ptr(void *ptr); void *tun_ptr_to_xdp(void *ptr); +void tun_ptr_free(void *ptr); #else #include #include @@ -50,5 +51,8 @@ static inline void *tun_ptr_to_xdp(void *ptr) { return NULL; } +static inline void tun_ptr_free(void *ptr) +{ +} #endif /* CONFIG_TUN */ #endif /* __IF_TUN_H */ -- cgit v1.2.3 From ccefd976f921a280327b17b2896bc809baa7b672 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Tue, 2 Jan 2018 15:50:49 +0000 Subject: typec: tcpm: Add PD Rev 3.0 definitions to PD header This commit adds definitions for PD Rev 3.0 messages, including APDO PPS and extended message support for TCPM. Signed-off-by: Adam Thomson Acked-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 185 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 174 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index b3d41d7409b3..ff359bdfdc7b 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -35,6 +35,13 @@ enum pd_ctrl_msg_type { PD_CTRL_WAIT = 12, PD_CTRL_SOFT_RESET = 13, /* 14-15 Reserved */ + PD_CTRL_NOT_SUPP = 16, + PD_CTRL_GET_SOURCE_CAP_EXT = 17, + PD_CTRL_GET_STATUS = 18, + PD_CTRL_FR_SWAP = 19, + PD_CTRL_GET_PPS_STATUS = 20, + PD_CTRL_GET_COUNTRY_CODES = 21, + /* 22-31 Reserved */ }; enum pd_data_msg_type { @@ -43,13 +50,39 @@ enum pd_data_msg_type { PD_DATA_REQUEST = 2, PD_DATA_BIST = 3, PD_DATA_SINK_CAP = 4, - /* 5-14 Reserved */ + PD_DATA_BATT_STATUS = 5, + PD_DATA_ALERT = 6, + PD_DATA_GET_COUNTRY_INFO = 7, + /* 8-14 Reserved */ PD_DATA_VENDOR_DEF = 15, + /* 16-31 Reserved */ +}; + +enum pd_ext_msg_type { + /* 0 Reserved */ + PD_EXT_SOURCE_CAP_EXT = 1, + PD_EXT_STATUS = 2, + PD_EXT_GET_BATT_CAP = 3, + PD_EXT_GET_BATT_STATUS = 4, + PD_EXT_BATT_CAP = 5, + PD_EXT_GET_MANUFACTURER_INFO = 6, + PD_EXT_MANUFACTURER_INFO = 7, + PD_EXT_SECURITY_REQUEST = 8, + PD_EXT_SECURITY_RESPONSE = 9, + PD_EXT_FW_UPDATE_REQUEST = 10, + PD_EXT_FW_UPDATE_RESPONSE = 11, + PD_EXT_PPS_STATUS = 12, + PD_EXT_COUNTRY_INFO = 13, + PD_EXT_COUNTRY_CODES = 14, + /* 15-31 Reserved */ }; #define PD_REV10 0x0 #define PD_REV20 0x1 +#define PD_REV30 0x2 +#define PD_MAX_REV PD_REV30 +#define PD_HEADER_EXT_HDR BIT(15) #define PD_HEADER_CNT_SHIFT 12 #define PD_HEADER_CNT_MASK 0x7 #define PD_HEADER_ID_SHIFT 9 @@ -59,18 +92,19 @@ enum pd_data_msg_type { #define PD_HEADER_REV_MASK 0x3 #define PD_HEADER_DATA_ROLE BIT(5) #define PD_HEADER_TYPE_SHIFT 0 -#define PD_HEADER_TYPE_MASK 0xf +#define PD_HEADER_TYPE_MASK 0x1f -#define PD_HEADER(type, pwr, data, id, cnt) \ +#define PD_HEADER(type, pwr, data, rev, id, cnt, ext_hdr) \ ((((type) & PD_HEADER_TYPE_MASK) << PD_HEADER_TYPE_SHIFT) | \ ((pwr) == TYPEC_SOURCE ? PD_HEADER_PWR_ROLE : 0) | \ ((data) == TYPEC_HOST ? PD_HEADER_DATA_ROLE : 0) | \ - (PD_REV20 << PD_HEADER_REV_SHIFT) | \ + (rev << PD_HEADER_REV_SHIFT) | \ (((id) & PD_HEADER_ID_MASK) << PD_HEADER_ID_SHIFT) | \ - (((cnt) & PD_HEADER_CNT_MASK) << PD_HEADER_CNT_SHIFT)) + (((cnt) & PD_HEADER_CNT_MASK) << PD_HEADER_CNT_SHIFT) | \ + ((ext_hdr) ? PD_HEADER_EXT_HDR : 0)) #define PD_HEADER_LE(type, pwr, data, id, cnt) \ - cpu_to_le16(PD_HEADER((type), (pwr), (data), (id), (cnt))) + cpu_to_le16(PD_HEADER((type), (pwr), (data), PD_REV20, (id), (cnt), (0))) static inline unsigned int pd_header_cnt(u16 header) { @@ -102,16 +136,75 @@ static inline unsigned int pd_header_msgid_le(__le16 header) return pd_header_msgid(le16_to_cpu(header)); } +static inline unsigned int pd_header_rev(u16 header) +{ + return (header >> PD_HEADER_REV_SHIFT) & PD_HEADER_REV_MASK; +} + +static inline unsigned int pd_header_rev_le(__le16 header) +{ + return pd_header_rev(le16_to_cpu(header)); +} + +#define PD_EXT_HDR_CHUNKED BIT(15) +#define PD_EXT_HDR_CHUNK_NUM_SHIFT 11 +#define PD_EXT_HDR_CHUNK_NUM_MASK 0xf +#define PD_EXT_HDR_REQ_CHUNK BIT(10) +#define PD_EXT_HDR_DATA_SIZE_SHIFT 0 +#define PD_EXT_HDR_DATA_SIZE_MASK 0x1ff + +#define PD_EXT_HDR(data_size, req_chunk, chunk_num, chunked) \ + ((((data_size) & PD_EXT_HDR_DATA_SIZE_MASK) << PD_EXT_HDR_DATA_SIZE_SHIFT) | \ + ((req_chunk) ? PD_EXT_HDR_REQ_CHUNK : 0) | \ + (((chunk_num) & PD_EXT_HDR_CHUNK_NUM_MASK) << PD_EXT_HDR_CHUNK_NUM_SHIFT) | \ + ((chunked) ? PD_EXT_HDR_CHUNKED : 0)) + +#define PD_EXT_HDR_LE(data_size, req_chunk, chunk_num, chunked) \ + cpu_to_le16(PD_EXT_HDR((data_size), (req_chunk), (chunk_num), (chunked))) + +static inline unsigned int pd_ext_header_chunk_num(u16 ext_header) +{ + return (ext_header >> PD_EXT_HDR_CHUNK_NUM_SHIFT) & + PD_EXT_HDR_CHUNK_NUM_MASK; +} + +static inline unsigned int pd_ext_header_data_size(u16 ext_header) +{ + return (ext_header >> PD_EXT_HDR_DATA_SIZE_SHIFT) & + PD_EXT_HDR_DATA_SIZE_MASK; +} + +static inline unsigned int pd_ext_header_data_size_le(__le16 ext_header) +{ + return pd_ext_header_data_size(le16_to_cpu(ext_header)); +} + #define PD_MAX_PAYLOAD 7 +#define PD_EXT_MAX_CHUNK_DATA 26 /** - * struct pd_message - PD message as seen on wire - * @header: PD message header - * @payload: PD message payload - */ + * struct pd_chunked_ext_message_data - PD chunked extended message data as + * seen on wire + * @header: PD extended message header + * @data: PD extended message data + */ +struct pd_chunked_ext_message_data { + __le16 header; + u8 data[PD_EXT_MAX_CHUNK_DATA]; +} __packed; + +/** + * struct pd_message - PD message as seen on wire + * @header: PD message header + * @payload: PD message payload + * @ext_msg: PD message chunked extended message data + */ struct pd_message { __le16 header; - __le32 payload[PD_MAX_PAYLOAD]; + union { + __le32 payload[PD_MAX_PAYLOAD]; + struct pd_chunked_ext_message_data ext_msg; + }; } __packed; /* PDO: Power Data Object */ @@ -121,6 +214,7 @@ enum pd_pdo_type { PDO_TYPE_FIXED = 0, PDO_TYPE_BATT = 1, PDO_TYPE_VAR = 2, + PDO_TYPE_APDO = 3, }; #define PDO_TYPE_SHIFT 30 @@ -174,6 +268,34 @@ enum pd_pdo_type { (PDO_TYPE(PDO_TYPE_VAR) | PDO_VAR_MIN_VOLT(min_mv) | \ PDO_VAR_MAX_VOLT(max_mv) | PDO_VAR_MAX_CURR(max_ma)) +enum pd_apdo_type { + APDO_TYPE_PPS = 0, +}; + +#define PDO_APDO_TYPE_SHIFT 28 /* Only valid value currently is 0x0 - PPS */ +#define PDO_APDO_TYPE_MASK 0x3 + +#define PDO_APDO_TYPE(t) ((t) << PDO_APDO_TYPE_SHIFT) + +#define PDO_PPS_APDO_MAX_VOLT_SHIFT 17 /* 100mV units */ +#define PDO_PPS_APDO_MIN_VOLT_SHIFT 8 /* 100mV units */ +#define PDO_PPS_APDO_MAX_CURR_SHIFT 0 /* 50mA units */ + +#define PDO_PPS_APDO_VOLT_MASK 0xff +#define PDO_PPS_APDO_CURR_MASK 0x7f + +#define PDO_PPS_APDO_MIN_VOLT(mv) \ + ((((mv) / 100) & PDO_PPS_APDO_VOLT_MASK) << PDO_PPS_APDO_MIN_VOLT_SHIFT) +#define PDO_PPS_APDO_MAX_VOLT(mv) \ + ((((mv) / 100) & PDO_PPS_APDO_VOLT_MASK) << PDO_PPS_APDO_MAX_VOLT_SHIFT) +#define PDO_PPS_APDO_MAX_CURR(ma) \ + ((((ma) / 50) & PDO_PPS_APDO_CURR_MASK) << PDO_PPS_APDO_MAX_CURR_SHIFT) + +#define PDO_PPS_APDO(min_mv, max_mv, max_ma) \ + (PDO_TYPE(PDO_TYPE_APDO) | PDO_APDO_TYPE(APDO_TYPE_PPS) | \ + PDO_PPS_APDO_MIN_VOLT(min_mv) | PDO_PPS_APDO_MAX_VOLT(max_mv) | \ + PDO_PPS_APDO_MAX_CURR(max_ma)) + static inline enum pd_pdo_type pdo_type(u32 pdo) { return (pdo >> PDO_TYPE_SHIFT) & PDO_TYPE_MASK; @@ -204,6 +326,29 @@ static inline unsigned int pdo_max_power(u32 pdo) return ((pdo >> PDO_BATT_MAX_PWR_SHIFT) & PDO_PWR_MASK) * 250; } +static inline enum pd_apdo_type pdo_apdo_type(u32 pdo) +{ + return (pdo >> PDO_APDO_TYPE_SHIFT) & PDO_APDO_TYPE_MASK; +} + +static inline unsigned int pdo_pps_apdo_min_voltage(u32 pdo) +{ + return ((pdo >> PDO_PPS_APDO_MIN_VOLT_SHIFT) & + PDO_PPS_APDO_VOLT_MASK) * 100; +} + +static inline unsigned int pdo_pps_apdo_max_voltage(u32 pdo) +{ + return ((pdo >> PDO_PPS_APDO_MAX_VOLT_SHIFT) & + PDO_PPS_APDO_VOLT_MASK) * 100; +} + +static inline unsigned int pdo_pps_apdo_max_current(u32 pdo) +{ + return ((pdo >> PDO_PPS_APDO_MAX_CURR_SHIFT) & + PDO_PPS_APDO_CURR_MASK) * 50; +} + /* RDO: Request Data Object */ #define RDO_OBJ_POS_SHIFT 28 #define RDO_OBJ_POS_MASK 0x7 @@ -237,6 +382,24 @@ static inline unsigned int pdo_max_power(u32 pdo) (RDO_OBJ(idx) | (flags) | \ RDO_BATT_OP_PWR(op_mw) | RDO_BATT_MAX_PWR(max_mw)) +#define RDO_PROG_VOLT_MASK 0x7ff +#define RDO_PROG_CURR_MASK 0x7f + +#define RDO_PROG_VOLT_SHIFT 9 +#define RDO_PROG_CURR_SHIFT 0 + +#define RDO_PROG_VOLT_MV_STEP 20 +#define RDO_PROG_CURR_MA_STEP 50 + +#define PDO_PROG_OUT_VOLT(mv) \ + ((((mv) / RDO_PROG_VOLT_MV_STEP) & RDO_PROG_VOLT_MASK) << RDO_PROG_VOLT_SHIFT) +#define PDO_PROG_OP_CURR(ma) \ + ((((ma) / RDO_PROG_CURR_MA_STEP) & RDO_PROG_CURR_MASK) << RDO_PROG_CURR_SHIFT) + +#define RDO_PROG(idx, out_mv, op_ma, flags) \ + (RDO_OBJ(idx) | (flags) | \ + PDO_PROG_OUT_VOLT(out_mv) | PDO_PROG_OP_CURR(op_ma)) + static inline unsigned int rdo_index(u32 rdo) { return (rdo >> RDO_OBJ_POS_SHIFT) & RDO_OBJ_POS_MASK; -- cgit v1.2.3 From 456ebb4f221e98f507cc945690a670a79682c029 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Tue, 2 Jan 2018 15:50:50 +0000 Subject: typec: tcpm: Add ADO header for Alert message handling This commit adds a header providing definitions for handling Alert messages. Currently the header only focuses on handling incoming alerts. Signed-off-by: Adam Thomson Acked-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd_ado.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/linux/usb/pd_ado.h (limited to 'include/linux') diff --git a/include/linux/usb/pd_ado.h b/include/linux/usb/pd_ado.h new file mode 100644 index 000000000000..9aa1cf31c93c --- /dev/null +++ b/include/linux/usb/pd_ado.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2017 Dialog Semiconductor + * + * Author: Adam Thomson + */ + +#ifndef __LINUX_USB_PD_ADO_H +#define __LINUX_USB_PD_ADO_H + +/* ADO : Alert Data Object */ +#define USB_PD_ADO_TYPE_SHIFT 24 +#define USB_PD_ADO_TYPE_MASK 0xff +#define USB_PD_ADO_FIXED_BATT_SHIFT 20 +#define USB_PD_ADO_FIXED_BATT_MASK 0xf +#define USB_PD_ADO_HOT_SWAP_BATT_SHIFT 16 +#define USB_PD_ADO_HOT_SWAP_BATT_MASK 0xf + +#define USB_PD_ADO_TYPE_BATT_STATUS_CHANGE BIT(1) +#define USB_PD_ADO_TYPE_OCP BIT(2) +#define USB_PD_ADO_TYPE_OTP BIT(3) +#define USB_PD_ADO_TYPE_OP_COND_CHANGE BIT(4) +#define USB_PD_ADO_TYPE_SRC_INPUT_CHANGE BIT(5) +#define USB_PD_ADO_TYPE_OVP BIT(6) + +static inline unsigned int usb_pd_ado_type(u32 ado) +{ + return (ado >> USB_PD_ADO_TYPE_SHIFT) & USB_PD_ADO_TYPE_MASK; +} + +static inline unsigned int usb_pd_ado_fixed_batt(u32 ado) +{ + return (ado >> USB_PD_ADO_FIXED_BATT_SHIFT) & + USB_PD_ADO_FIXED_BATT_MASK; +} + +static inline unsigned int usb_pd_ado_hot_swap_batt(u32 ado) +{ + return (ado >> USB_PD_ADO_HOT_SWAP_BATT_SHIFT) & + USB_PD_ADO_HOT_SWAP_BATT_MASK; +} +#endif /* __LINUX_USB_PD_ADO_H */ -- cgit v1.2.3 From 02cad961cae556357e4a63b11f849e80418c1ffc Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Tue, 2 Jan 2018 15:50:51 +0000 Subject: typec: tcpm: Add SDB header for Status message handling This commit adds a header providing definitions for handling Status messages. Currently the header only focuses on handling incoming Status messages. Signed-off-by: Adam Thomson Acked-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd_ext_sdb.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/usb/pd_ext_sdb.h (limited to 'include/linux') diff --git a/include/linux/usb/pd_ext_sdb.h b/include/linux/usb/pd_ext_sdb.h new file mode 100644 index 000000000000..0eb83ce19597 --- /dev/null +++ b/include/linux/usb/pd_ext_sdb.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2017 Dialog Semiconductor + * + * Author: Adam Thomson + */ + +#ifndef __LINUX_USB_PD_EXT_SDB_H +#define __LINUX_USB_PD_EXT_SDB_H + +/* SDB : Status Data Block */ +enum usb_pd_ext_sdb_fields { + USB_PD_EXT_SDB_INTERNAL_TEMP = 0, + USB_PD_EXT_SDB_PRESENT_INPUT, + USB_PD_EXT_SDB_PRESENT_BATT_INPUT, + USB_PD_EXT_SDB_EVENT_FLAGS, + USB_PD_EXT_SDB_TEMP_STATUS, + USB_PD_EXT_SDB_DATA_SIZE, +}; + +/* Event Flags */ +#define USB_PD_EXT_SDB_EVENT_OCP BIT(1) +#define USB_PD_EXT_SDB_EVENT_OTP BIT(2) +#define USB_PD_EXT_SDB_EVENT_OVP BIT(3) +#define USB_PD_EXT_SDB_EVENT_CF_CV_MODE BIT(4) + +#define USB_PD_EXT_SDB_PPS_EVENTS (USB_PD_EXT_SDB_EVENT_OCP | \ + USB_PD_EXT_SDB_EVENT_OTP | \ + USB_PD_EXT_SDB_EVENT_OVP) + +#endif /* __LINUX_USB_PD_EXT_SDB_H */ -- cgit v1.2.3 From 4e88d4c083016454f179686529ae65d70b933b58 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 3 Mar 2018 22:43:03 +0100 Subject: usb: add a flag to skip PHY initialization to struct usb_hcd The USB HCD core driver parses the device-tree node for "phys" and "usb-phys" properties. It also manages the power state of these PHYs automatically. However, drivers may opt-out of this behavior by setting "phy" or "usb_phy" in struct usb_hcd to a non-null value. An example where this is required is the "Qualcomm USB2 controller", implemented by the chipidea driver. The hardware requires that the PHY is only powered on after the "reset completed" event from the controller is received. A follow-up patch will allow the USB HCD core driver to manage more than one PHY. Add a new "skip_phy_initialization" bitflag to struct usb_hcd so drivers can opt-out of any PHY management provided by the USB HCD core driver. This also updates the existing drivers so they use the new flag if they want to opt out of the PHY management provided by the USB HCD core driver. This means that for these drivers the new "multiple PHY" handling (which will be added in a follow-up patch) will be disabled as well. Signed-off-by: Martin Blumenstingl Acked-by: Peter Chen Tested-by: Neil Armstrong Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 176900528822..693502c84c04 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -151,6 +151,12 @@ struct usb_hcd { unsigned msix_enabled:1; /* driver has MSI-X enabled? */ unsigned msi_enabled:1; /* driver has MSI enabled? */ unsigned remove_phy:1; /* auto-remove USB phy */ + /* + * do not manage the PHY state in the HCD core, instead let the driver + * handle this (for example if the PHY can only be turned on after a + * specific event) + */ + unsigned skip_phy_initialization:1; /* The next flag is a stopgap, to be removed when all the HCDs * support the new root-hub polling mechanism. */ -- cgit v1.2.3 From 178a0bce05cbc17a27f9cba78258c5d12adc980c Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 3 Mar 2018 22:43:05 +0100 Subject: usb: core: hcd: integrate the PHY wrapper into the HCD core This integrates the PHY wrapper into the core hcd infrastructure. Multiple PHYs which are part of the HCD's device tree node are now managed (= powered on/off when needed), by the new usb_phy_roothub code. Suspend and resume is also supported, however not for runtime/auto-suspend (which is triggered for example when no devices are connected to the USB bus). This is needed on some SoCs (for example Amlogic Meson GXL) because if the PHYs are disabled during auto-suspend then devices which are plugged in afterwards are not seen by the host. One example where this is required is the Amlogic GXL and GXM SoCs: They are using a dwc3 USB controller with up to three ports enabled on the internal roothub. Each port has it's own PHY which must be enabled (if one of the PHYs is left disabled then none of the USB ports works at all). The new logic works on the Amlogic GXL and GXM SoCs because the dwc3 driver internally creates a xhci-hcd which then registers a HCD which then triggers our new PHY wrapper. USB controller drivers can opt out of this by setting "skip_phy_initialization" in struct usb_hcd to true. This is identical to how it works for a single USB PHY, so the "multiple PHY" handling is disabled for drivers that opted out of the management logic of a single PHY. Signed-off-by: Martin Blumenstingl Acked-by: Alan Stern Acked-by: Chunfeng Yun Tested-by: Yixun Lan Tested-by: Neil Armstrong Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 693502c84c04..a042675e03ba 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -104,6 +104,7 @@ struct usb_hcd { */ struct usb_phy *usb_phy; struct phy *phy; + struct usb_phy_roothub *phy_roothub; /* Flags that need to be manipulated atomically because they can * change while the host controller is running. Always use -- cgit v1.2.3 From ad70f937e9d0bdc580e390db3a047f9e58863b6e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 3 Mar 2018 22:43:09 +0100 Subject: usb: core: hcd: remove support for initializing a single PHY With the new PHY wrapper in place we can now handle multiple PHYs. Remove the code which handles only one generic PHY as this is now covered (with support for multiple PHYs as well as suspend/resume support) by the new PHY wrapper. Signed-off-by: Martin Blumenstingl Tested-by: Neil Armstrong Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index a042675e03ba..aef50cb2ed1b 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -103,7 +103,6 @@ struct usb_hcd { * other external phys should be software-transparent */ struct usb_phy *usb_phy; - struct phy *phy; struct usb_phy_roothub *phy_roothub; /* Flags that need to be manipulated atomically because they can -- cgit v1.2.3 From f5426250a6ecfd1e9b2d5e0daf07565f664aa67d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 9 Mar 2018 10:39:24 +0100 Subject: net: introduce IFF_NO_RX_HANDLER Some network devices - notably ipvlan slave - are not compatible with any kind of rx_handler. Currently the hook can be installed but any configuration (bridge, bond, macsec, ...) is nonfunctional. This change allocates a priv_flag bit to mark such devices and explicitly forbid installing a rx_handler if such bit is set. The new bit is used by ipvlan slave device. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9711108c3916..5fbb9f1da7fd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1397,6 +1397,7 @@ struct net_device_ops { * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external * entity (i.e. the master device for bridged veth) * @IFF_MACSEC: device is a MACsec device + * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1425,6 +1426,7 @@ enum netdev_priv_flags { IFF_RXFH_CONFIGURED = 1<<23, IFF_PHONY_HEADROOM = 1<<24, IFF_MACSEC = 1<<25, + IFF_NO_RX_HANDLER = 1<<26, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1452,6 +1454,7 @@ enum netdev_priv_flags { #define IFF_TEAM IFF_TEAM #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED #define IFF_MACSEC IFF_MACSEC +#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER /** * struct net_device - The DEVICE structure. -- cgit v1.2.3 From 739d875dd6982618020d30f58f8acf10f6076e6d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Mar 2018 09:48:46 +0000 Subject: mn10300: Remove the architecture Remove the MN10300 arch as the hardware is defunct. Suggested-by: Arnd Bergmann Signed-off-by: David Howells cc: Masahiro Yamada cc: linux-am33-list@redhat.com Signed-off-by: Arnd Bergmann --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 771989d25ef8..20d42c0d9fb6 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -25,7 +25,7 @@ #include #include -#if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) +#if defined(CONFIG_CRIS) || defined(CONFIG_FRV) # define SUPPORT_VLB_SYNC 0 #else # define SUPPORT_VLB_SYNC 1 -- cgit v1.2.3 From 78b98e3c5a66d569a53b8f57b6a698f912794a43 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 9 Mar 2018 10:42:48 -0800 Subject: timekeeping/ntp: Determine the multiplier directly from NTP tick length When the length of the NTP tick changes significantly, e.g. when an NTP/PTP application is correcting the initial offset of the clock, a large value may accumulate in the NTP error before the multiplier converges to the correct value. It may then take a very long time (hours or even days) before the error is corrected. This causes the clock to have an unstable frequency offset, which has a negative impact on the stability of synchronization with precise time sources (e.g. NTP/PTP using hardware timestamping or the PTP KVM clock). Use division to determine the correct multiplier directly from the NTP tick length and replace the iterative approach. This removes the last major source of the NTP error. The only remaining source is now limited resolution of the multiplier, which is corrected by adding 1 to the multiplier when the system clock is behind the NTP time. Signed-off-by: Miroslav Lichvar Signed-off-by: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1520620971-9567-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/timekeeper_internal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index d315c3d6725c..7acb953298a7 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -117,6 +117,8 @@ struct timekeeper { s64 ntp_error; u32 ntp_error_shift; u32 ntp_err_mult; + /* Flag used to avoid updating NTP twice with same second */ + u32 skip_second_overflow; #ifdef CONFIG_DEBUG_TIMEKEEPING long last_warning; /* -- cgit v1.2.3 From 00b4145298aeb05a2d110117ed18148cb21ebd14 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 15 Jan 2018 20:51:39 -0600 Subject: ring-buffer: Add interface for setting absolute time stamps Define a new function, tracing_set_time_stamp_abs(), which can be used to enable or disable the use of absolute timestamps rather than time deltas for a trace array. Only the interface is added here; a subsequent patch will add the underlying implementation. Link: http://lkml.kernel.org/r/ce96119de44c7fe0ee44786d15254e9b493040d3.1516069914.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Baohong Liu Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 7d9eb39fa76a..025159e17e1b 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -178,6 +178,8 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, int cpu, u64 *ts); void ring_buffer_set_clock(struct ring_buffer *buffer, u64 (*clock)(void)); +void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs); +bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer); size_t ring_buffer_page_len(void *page); -- cgit v1.2.3 From dc4e2801d400b0346fb281ce9cf010d611e2243c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 15 Jan 2018 20:51:40 -0600 Subject: ring-buffer: Redefine the unimplemented RINGBUF_TYPE_TIME_STAMP RINGBUF_TYPE_TIME_STAMP is defined but not used, and from what I can gather was reserved for something like an absolute timestamp feature for the ring buffer, if not a complete replacement of the current time_delta scheme. This code redefines RINGBUF_TYPE_TIME_STAMP to implement absolute time stamps. Another way to look at it is that it essentially forces extended time_deltas for all events. The motivation for doing this is to enable time_deltas that aren't dependent on previous events in the ring buffer, making it feasible to use the ring_buffer_event timetamps in a more random-access way, for purposes other than serial event printing. To set/reset this mode, use tracing_set_timestamp_abs() from the previous interface patch. Link: http://lkml.kernel.org/r/477b362dba1ce7fab9889a1a8e885a62c472f041.1516069914.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 025159e17e1b..7cb84774c20d 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -34,10 +34,12 @@ struct ring_buffer_event { * array[0] = time delta (28 .. 59) * size = 8 bytes * - * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock - * array[0] = tv_nsec - * array[1..2] = tv_sec - * size = 16 bytes + * @RINGBUF_TYPE_TIME_STAMP: Absolute timestamp + * Same format as TIME_EXTEND except that the + * value is an absolute timestamp, not a delta + * event.time_delta contains bottom 27 bits + * array[0] = top (28 .. 59) bits + * size = 8 bytes * * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: * Data record @@ -54,12 +56,12 @@ enum ring_buffer_type { RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, RINGBUF_TYPE_PADDING, RINGBUF_TYPE_TIME_EXTEND, - /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ RINGBUF_TYPE_TIME_STAMP, }; unsigned ring_buffer_event_length(struct ring_buffer_event *event); void *ring_buffer_event_data(struct ring_buffer_event *event); +u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event); /* * ring_buffer_discard_commit will remove an event that has not -- cgit v1.2.3 From 1ac4f51c0eb518e04ff3455f0c7d17ad9187eb27 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 15 Jan 2018 20:51:42 -0600 Subject: tracing: Give event triggers access to ring_buffer_event The ring_buffer event can provide a timestamp that may be useful to various triggers - pass it into the handlers for that purpose. Link: http://lkml.kernel.org/r/6de592683b59fa70ffa5d43d0109896623fc1367.1516069914.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8a1442c4e513..0cf48c61cc6d 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -430,11 +430,13 @@ enum event_trigger_type { extern int filter_match_preds(struct event_filter *filter, void *rec); -extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, - void *rec); -extern void event_triggers_post_call(struct trace_event_file *file, - enum event_trigger_type tt, - void *rec); +extern enum event_trigger_type +event_triggers_call(struct trace_event_file *file, void *rec, + struct ring_buffer_event *event); +extern void +event_triggers_post_call(struct trace_event_file *file, + enum event_trigger_type tt, + void *rec, struct ring_buffer_event *event); bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); @@ -454,7 +456,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file) if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { if (eflags & EVENT_FILE_FL_TRIGGER_MODE) - event_triggers_call(file, NULL); + event_triggers_call(file, NULL, NULL); if (eflags & EVENT_FILE_FL_SOFT_DISABLED) return true; if (eflags & EVENT_FILE_FL_PID_FILTER) -- cgit v1.2.3 From 8e012066fe0de5ff5be606836f9075511bce5604 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 7 Feb 2018 17:26:32 -0500 Subject: ring-buffer: Add nesting for adding events within events The ring-buffer code has recusion protection in case tracing ends up tracing itself, the ring-buffer will detect that it was called at the same context (normal, softirq, interrupt or NMI), and not continue to record the event. With the histogram synthetic events, they are called while tracing another event at the same context. The recusion protection triggers because it detects tracing at the same context and stops it. Add ring_buffer_nest_start() and ring_buffer_nest_end() that will notify the ring buffer that a trace is about to happen within another trace and that it is intended, and not to trigger the recursion blocking. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 7cb84774c20d..a0233edc0718 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -117,6 +117,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, int ring_buffer_write(struct ring_buffer *buffer, unsigned long length, void *data); +void ring_buffer_nest_start(struct ring_buffer *buffer); +void ring_buffer_nest_end(struct ring_buffer *buffer); + struct ring_buffer_event * ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); -- cgit v1.2.3 From d1ed7c558612630ce4c48e440a6fdd8d4785f6a3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 24 Feb 2018 23:45:56 +0100 Subject: leds: Extends disk trigger for reads and writes This adds two new disk triggers for triggering on reads and writes respectively, named "disk-read" and "disk-write". The use case comes from working on the D-Link DNS-313 NAS box. This features an RGB LED for disk activity. with these two triggers I can couple the green LED to read activity and the red LED to write activity, which gives the appropriate user feedback about what is happening on the disk. When tested it gave exactly the feedback desired. The in-kernel interface is simply changed to pass a bool indicating if the activity is write activity and update each trigger (and the composite "disk-activity" trigger) depending on what is passed in. Signed-off-by: Linus Walleij Reviewed-by: Bartlomiej Zolnierkiewicz Acked-by: Pavel Machek Acked-by: Tejun Heo Acked-by: David S. Miller Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 5579c64c8fd6..b7e82550e655 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -346,9 +346,9 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) /* Trigger specific functions */ #ifdef CONFIG_LEDS_TRIGGER_DISK -extern void ledtrig_disk_activity(void); +extern void ledtrig_disk_activity(bool write); #else -static inline void ledtrig_disk_activity(void) {} +static inline void ledtrig_disk_activity(bool write) {} #endif #ifdef CONFIG_LEDS_TRIGGER_MTD -- cgit v1.2.3 From b1d0a5d0cba4597c0394997b2d5fced3e3841b4e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 10 Mar 2018 01:15:45 +0100 Subject: netfilter: x_tables: add and use xt_check_proc_name recent and hashlimit both create /proc files, but only check that name is 0 terminated. This can trigger WARN() from procfs when name is "" or "/". Add helper for this and then use it for both. Cc: Eric Dumazet Reported-by: Eric Dumazet Reported-by: Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1313b35c3ab7..14529511c4b8 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -285,6 +285,8 @@ unsigned int *xt_alloc_entry_offsets(unsigned int size); bool xt_find_jump_offset(const unsigned int *offsets, unsigned int target, unsigned int size); +int xt_check_proc_name(const char *name, unsigned int size); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, -- cgit v1.2.3 From c59c9c85e36aa09cfd901cc15a0d8d3772c18195 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 7 Feb 2018 18:22:49 +0800 Subject: soc: mediatek: avoid hardcoded value with bus_prot_mask use a meaningful definition for bus_prot_mask instead of just hardcoded for it. Signed-off-by: Sean Wang Reviewed-by: Ulf Hansson Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index b0a507d356ef..fd25f0148566 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -21,6 +21,10 @@ #define MT8173_TOP_AXI_PROT_EN_MFG_M1 BIT(22) #define MT8173_TOP_AXI_PROT_EN_MFG_SNOOP_OUT BIT(23) +#define MT2701_TOP_AXI_PROT_EN_MM_M0 BIT(1) +#define MT2701_TOP_AXI_PROT_EN_CONN_M BIT(2) +#define MT2701_TOP_AXI_PROT_EN_CONN_S BIT(8) + #define MT7622_TOP_AXI_PROT_EN_ETHSYS (BIT(3) | BIT(17)) #define MT7622_TOP_AXI_PROT_EN_HIF0 (BIT(24) | BIT(25)) #define MT7622_TOP_AXI_PROT_EN_HIF1 (BIT(26) | BIT(27) | \ -- cgit v1.2.3 From 7e904a91bf6049071ef9d605a52f863ae774081d Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Mon, 12 Mar 2018 08:44:56 +0000 Subject: efi: Use efi_mm in x86 as well as ARM Presently, only ARM uses mm_struct to manage EFI page tables and EFI runtime region mappings. As this is the preferred approach, let's make this data structure common across architectures. Specially, for x86, using this data structure improves code maintainability and readability. Tested-by: Bhupesh Sharma [ardb: don't #include the world to get a declaration of struct mm_struct] Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180312084500.10764-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index f5083aa72eae..f1b7d68ac460 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -966,6 +966,8 @@ extern struct efi { unsigned long flags; } efi; +extern struct mm_struct efi_mm; + static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right) { -- cgit v1.2.3 From 9e49e2447c6385e45c6fddd70d6c0e917e21b669 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 27 Feb 2018 17:05:10 +0100 Subject: sched/core: Remove TASK_ALL It's unused: $ git grep "\" | wc -l 1 ... and it is also dangerous, kill the bugger. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180227160510.10829-1-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b161ef8a902e..21b1168da951 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -93,7 +93,6 @@ struct task_group; /* Convenience macros for the sake of wake_up(): */ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) -#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) /* get_task_state(): */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ -- cgit v1.2.3 From 8e1a2031e4b556b01ca53cd1fb2d83d811a6605b Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Fri, 8 Sep 2017 11:47:03 +0300 Subject: perf/cor: Use RB trees for pinned/flexible groups Change event groups into RB trees sorted by CPU and then by a 64bit index, so that multiplexing hrtimer interrupt handler would be able skipping to the current CPU's list and ignore groups allocated for the other CPUs. New API for manipulating event groups in the trees is implemented as well as adoption on the API in the current implementation. pinned_group_sched_in() and flexible_group_sched_in() API are introduced to consolidate code enabling the whole group from pinned and flexible groups appropriately. Signed-off-by: Alexey Budankov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Carrillo-Cisneros Cc: Dmitri Prokhorov Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Valery Cherepennikov Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/372f9c8b-0cfe-4240-e44d-83d863d40813@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7546822a1d74..6e3f854a34d8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -558,7 +558,11 @@ struct perf_event { */ struct list_head group_entry; struct list_head sibling_list; - + /* + * Node on the pinned or flexible tree located at the event context; + */ + struct rb_node group_node; + u64 group_index; /* * We need storage to track the entries in perf_pmu_migrate_context; we * cannot use the event_entry because of RCU and we want to keep the @@ -690,6 +694,12 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; + +struct perf_event_groups { + struct rb_root tree; + u64 index; +}; + /** * struct perf_event_context - event context structure * @@ -710,8 +720,8 @@ struct perf_event_context { struct mutex mutex; struct list_head active_ctx_list; - struct list_head pinned_groups; - struct list_head flexible_groups; + struct perf_event_groups pinned_groups; + struct perf_event_groups flexible_groups; struct list_head event_list; int nr_events; int nr_active; -- cgit v1.2.3 From 8343aae66167df6708128a778e750d48dbe31302 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 13 Nov 2017 14:28:33 +0100 Subject: perf/core: Remove perf_event::group_entry Now that all the grouping is done with RB trees, we no longer need group_entry and can replace the whole thing with sibling_list. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Arnaldo Carvalho de Melo Cc: David Carrillo-Cisneros Cc: Dmitri Prokhorov Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Valery Cherepennikov Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6e3f854a34d8..84044ec21b31 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -549,14 +549,9 @@ struct perf_event { struct list_head event_entry; /* - * XXX: group_entry and sibling_list should be mutually exclusive; - * either you're a sibling on a group, or you're the group leader. - * Rework the code to always use the same list element. - * * Locked for modification by both ctx->mutex and ctx->lock; holding * either sufficies for read. */ - struct list_head group_entry; struct list_head sibling_list; /* * Node on the pinned or flexible tree located at the event context; -- cgit v1.2.3 From 6668128a9e25f7a11d25359e46df2541e6b43fc9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 13 Nov 2017 14:28:38 +0100 Subject: perf/core: Optimize ctx_sched_out() When an event group contains more events than can be scheduled on the hardware, iterating the full event group for ctx_sched_out is a waste of time. Keep track of the events that got programmed on the hardware, such that we can iterate this smaller list in order to schedule them out. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Arnaldo Carvalho de Melo Cc: David Carrillo-Cisneros Cc: Dmitri Prokhorov Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Valery Cherepennikov Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 84044ec21b31..2bb200e1bbea 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -553,6 +553,7 @@ struct perf_event { * either sufficies for read. */ struct list_head sibling_list; + struct list_head active_list; /* * Node on the pinned or flexible tree located at the event context; */ @@ -718,6 +719,10 @@ struct perf_event_context { struct perf_event_groups pinned_groups; struct perf_event_groups flexible_groups; struct list_head event_list; + + struct list_head pinned_active; + struct list_head flexible_active; + int nr_events; int nr_active; int is_active; -- cgit v1.2.3 From a2c054a896b8ac794ddcfc7c92e2dc7ec4ed4ed5 Mon Sep 17 00:00:00 2001 From: Brad Mouring Date: Thu, 8 Mar 2018 16:23:03 -0600 Subject: net: phy: Tell caller result of phy_change() In 664fcf123a30e (net: phy: Threaded interrupts allow some simplification) the phy_interrupt system was changed to use a traditional threaded interrupt scheme instead of a workqueue approach. With this change, the phy status check moved into phy_change, which did not report back to the caller whether or not the interrupt was handled. This means that, in the case of a shared phy interrupt, only the first phydev's interrupt registers are checked (since phy_interrupt() would always return IRQ_HANDLED). This leads to interrupt storms when it is a secondary device that's actually the interrupt source. Signed-off-by: Brad Mouring Signed-off-by: David S. Miller --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index d7069539f351..b260fb336b25 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1012,7 +1012,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_state_machine(struct work_struct *work); -void phy_change(struct phy_device *phydev); void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); -- cgit v1.2.3 From bf2ae2e4bf9360e07c0cdfa166bcdc0afd92f4ce Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 10 Mar 2018 18:57:50 +0800 Subject: sock_diag: request _diag module only when the family or proto has been registered Now when using 'ss' in iproute, kernel would try to load all _diag modules, which also causes corresponding family and proto modules to be loaded as well due to module dependencies. Like after running 'ss', sctp, dccp, af_packet (if it works as a module) would be loaded. For example: $ lsmod|grep sctp $ ss $ lsmod|grep sctp sctp_diag 16384 0 sctp 323584 5 sctp_diag inet_diag 24576 4 raw_diag,tcp_diag,sctp_diag,udp_diag libcrc32c 16384 3 nf_conntrack,nf_nat,sctp As these family and proto modules are loaded unintentionally, it could cause some problems, like: - Some debug tools use 'ss' to collect the socket info, which loads all those diag and family and protocol modules. It's noisy for identifying issues. - Users usually expect to drop sctp init packet silently when they have no sense of sctp protocol instead of sending abort back. - It wastes resources (especially with multiple netns), and SCTP module can't be unloaded once it's loaded. ... In short, it's really inappropriate to have these family and proto modules loaded unexpectedly when just doing debugging with inet_diag. This patch is to introduce sock_load_diag_module() where it loads the _diag module only when it's corresponding family or proto has been already registered. Note that we can't just load _diag module without the family or proto loaded, as some symbols used in _diag module are from the family or proto module. v1->v2: - move inet proto check to inet_diag to avoid a compiling err. v2->v3: - define sock_load_diag_module in sock.c and export one symbol only. - improve the changelog. Reported-by: Sabrina Dubroca Acked-by: Marcelo Ricardo Leitner Acked-by: Phil Sutter Acked-by: Sabrina Dubroca Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 91216b16feb7..2a0391eea05c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -222,6 +222,7 @@ enum { int sock_wake_async(struct socket_wq *sk_wq, int how, int band); int sock_register(const struct net_proto_family *fam); void sock_unregister(int family); +bool sock_is_registered(int family); int __sock_create(struct net *net, int family, int type, int proto, struct socket **res, int kern); int sock_create(int family, int type, int proto, struct socket **res); -- cgit v1.2.3 From c8f4c36f81623002165dce874fa60bb0c154b10e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 23 Feb 2018 13:45:28 +0200 Subject: direct-io: Remove unused DIO_ASYNC_EXTEND flag This flag was added by 6039257378e4 ("direct-io: add flag to allow aio writes beyond i_size") to support XFS. However, with the rework of XFS' DIO's path to use iomap in acdda3aae146 ("xfs: use iomap_dio_rw") it became redundant. So let's remove it. Reviewed-by: Christoph Hellwig Signed-off-by: Nikolay Borisov Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a815560fda0..260c233e7375 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2977,9 +2977,6 @@ enum { /* filesystem does not support filling holes */ DIO_SKIP_HOLES = 0x02, - /* filesystem can handle aio writes beyond i_size */ - DIO_ASYNC_EXTEND = 0x04, - /* inode/fs/bdev does not need truncate protection */ DIO_SKIP_DIO_COUNT = 0x08, }; -- cgit v1.2.3 From ce3077ee80d6ac1087c06441f4c63ce5f13ef12c Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 23 Feb 2018 13:45:29 +0200 Subject: direct-io: Remove unused DIO_SKIP_DIO_COUNT logic This flag was added by fe0f07d08ee3 ("direct-io: only inc/deci inode->i_dio_count for file systems") as means to optimise the atomic modificaiton of the variable for blockdevices. However with the advent of 542ff7bf18c6 ("block: new direct I/O implementation") it became unused. So let's remove it. Reviewed-by: Christoph Hellwig Signed-off-by: Nikolay Borisov Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 260c233e7375..9bee267209e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2976,9 +2976,6 @@ enum { /* filesystem does not support filling holes */ DIO_SKIP_HOLES = 0x02, - - /* inode/fs/bdev does not need truncate protection */ - DIO_SKIP_DIO_COUNT = 0x08, }; void dio_end_io(struct bio *bio); -- cgit v1.2.3 From e6d3cc7b1fac3d7f1313faf8ac9b23830113e3ec Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 14 Feb 2018 14:43:33 +0100 Subject: clk: divider: export clk_div_mask() helper Export clk_div_mask() in clk-provider header so every clock providers derived from the generic clock divider may share the definition instead of redefining it. Signed-off-by: Jerome Brunet Signed-off-by: Michael Turquette Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index f711be6e8c44..d8ba26d03332 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -399,6 +399,7 @@ struct clk_divider { spinlock_t *lock; }; +#define clk_div_mask(width) ((1 << (width)) - 1) #define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw) #define CLK_DIVIDER_ONE_BASED BIT(0) -- cgit v1.2.3 From 77deb66d262f8512130ff75ec5ea8e31070b41ed Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 14 Feb 2018 14:43:34 +0100 Subject: clk: mux: add helper function for index/value translation Add helper functions for the translation between parent index and register value in the generic multiplexer function. The purpose of this change is avoid duplicating the code in other clock providers, using the same generic logic. Signed-off-by: Jerome Brunet Signed-off-by: Michael Turquette Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index d8ba26d03332..fe720d679c31 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -511,6 +511,10 @@ struct clk_hw *clk_hw_register_mux_table(struct device *dev, const char *name, void __iomem *reg, u8 shift, u32 mask, u8 clk_mux_flags, u32 *table, spinlock_t *lock); +int clk_mux_val_to_index(struct clk_hw *hw, u32 *table, unsigned int flags, + unsigned int val); +unsigned int clk_mux_index_to_val(u32 *table, unsigned int flags, u8 index); + void clk_unregister_mux(struct clk *clk); void clk_hw_unregister_mux(struct clk_hw *hw); -- cgit v1.2.3 From fe3f338f0cb2ed4d4f06da054c21ae2f8a36ef2d Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 14 Feb 2018 14:43:38 +0100 Subject: clk: fix mux clock documentation The mux documentation mentions the non-existing parameter width instead of mask, so just sed this. The table field is missing in the documentation of clk_mux. Add a small blurb explaining what it is Fixes: 9d9f78ed9af0 ("clk: basic clock hardware types") Signed-off-by: Jerome Brunet Signed-off-by: Michael Turquette Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index fe720d679c31..cb18526d69cb 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -450,8 +450,9 @@ void clk_hw_unregister_divider(struct clk_hw *hw); * * @hw: handle between common and hardware-specific interfaces * @reg: register controlling multiplexer + * @table: array of register values corresponding to the parent index * @shift: shift to multiplexer bit field - * @width: width of mutliplexer bit field + * @mask: mask of mutliplexer bit field * @flags: hardware-specific flags * @lock: register lock * -- cgit v1.2.3 From b15ee490e16324c35b51f04bad54ae45a2cefd29 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 14 Feb 2018 14:43:39 +0100 Subject: clk: divider: read-only divider can propagate rate change When a divider clock has CLK_DIVIDER_READ_ONLY set, it means that the register shall be left un-touched, but it does not mean the clock should stop rate propagation if CLK_SET_RATE_PARENT is set This is properly handled in qcom clk-regmap-divider but it was not in the generic divider To fix this situation, introduce a new helper function divider_ro_round_rate, on the same model as divider_round_rate. Fixes: e6d5e7d90be9 ("clk-divider: Fix READ_ONLY when divider > 1") Signed-off-by: Jerome Brunet Tested-By: David Lechner Signed-off-by: Michael Turquette Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index cb18526d69cb..210a890008f9 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -420,6 +420,10 @@ long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent, unsigned long rate, unsigned long *prate, const struct clk_div_table *table, u8 width, unsigned long flags); +long divider_ro_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent, + unsigned long rate, unsigned long *prate, + const struct clk_div_table *table, u8 width, + unsigned long flags, unsigned int val); int divider_get_val(unsigned long rate, unsigned long parent_rate, const struct clk_div_table *table, u8 width, unsigned long flags); @@ -780,6 +784,17 @@ static inline long divider_round_rate(struct clk_hw *hw, unsigned long rate, rate, prate, table, width, flags); } +static inline long divider_ro_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate, + const struct clk_div_table *table, + u8 width, unsigned long flags, + unsigned int val) +{ + return divider_ro_round_rate_parent(hw, clk_hw_get_parent(hw), + rate, prate, table, width, flags, + val); +} + /* * FIXME clock api without lock protection */ -- cgit v1.2.3 From bdbc90fa55af632f8a883a3d93c54a08708ed80a Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 28 Feb 2018 20:31:52 +0800 Subject: f2fs: don't put dentry page in pagecache into highmem Previous dentry page uses highmem, which will cause panic in platforms using highmem (such as arm), since the address space of dentry pages from highmem directly goes into the decryption path via the function fscrypt_fname_disk_to_usr. But sg_init_one assumes the address is not from highmem, and then cause panic since it doesn't call kmap_high but kunmap_high is triggered at the end. To fix this problem in a simple way, this patch avoids to put dentry page in pagecache into highmem. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu [Jaegeuk Kim: fix coding style] Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 58aecb60ea51..393b880afc9a 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -46,7 +46,6 @@ /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) -#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) /* * For further optimization on multi-head logs, on-disk layout supports maximum -- cgit v1.2.3 From 199bc3fef29cacf672e7e5cd49d296c1fdc1a891 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 25 Jan 2018 19:40:08 +0800 Subject: f2fs: support large nat bitmap Previously, we will store all nat version bitmap in checkpoint pack block, so our total node entry number has a limitation which caused total node number can not exceed (3900 * 8) block * 455 node/block = 14196000. So that once user wants to create more nodes in large size image, it becomes a bottleneck, that's unreasonable. This patch detects the new layout of nat/sit version bitmap in image in order to enable supporting large nat bitmap. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 393b880afc9a..96c9bdbace50 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -116,6 +116,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 #define CP_NAT_BITS_FLAG 0x00000080 -- cgit v1.2.3 From 846ae671ad368e344a2b141c0f19e1014b27a0dd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2018 22:04:13 +0800 Subject: f2fs: expose extension_list sysfs entry This patch adds a sysfs entry 'extension_list' to support query/add/del item in extension list. Query: cat /sys/fs/f2fs//extension_list Add: echo 'extension' > /sys/fs/f2fs//extension_list Del: echo '!extension' > /sys/fs/f2fs//extension_list Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 96c9bdbace50..d8c241451712 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -21,6 +21,7 @@ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ +#define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ @@ -101,7 +102,7 @@ struct f2fs_super_block { __u8 uuid[16]; /* 128-bit uuid for volume */ __le16 volume_name[MAX_VOLUME_NAME]; /* volume name */ __le32 extension_count; /* # of extensions below */ - __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ + __u8 extension_list[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];/* extension array */ __le32 cp_payload; __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ -- cgit v1.2.3 From b6a06cbbb5f7fd03589cff9178314af04c568826 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 Feb 2018 17:07:27 +0800 Subject: f2fs: support hot file extension This patch supports to recognize hot file extension in f2fs, so that we can allocate proper hot segment location for its data, which can lead to better hot/cold seperation in filesystem. In addition, we changes a bit on query/add/del operation method for extension_list sysfs entry as below: - Query: cat /sys/fs/f2fs//extension_list - Add: echo 'extension' > /sys/fs/f2fs//extension_list - Del: echo '!extension' > /sys/fs/f2fs//extension_list - Add: echo '[h/c]extension' > /sys/fs/f2fs//extension_list - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list - [h] means add/del hot file extension - [c] means add/del cold file extension Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d8c241451712..b06ab1f04ff6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -111,7 +111,8 @@ struct f2fs_super_block { __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ struct f2fs_device devs[MAX_DEVICES]; /* device list */ __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ - __u8 reserved[315]; /* valid reserved region */ + __u8 hot_ext_count; /* # of hot file extension */ + __u8 reserved[314]; /* valid reserved region */ } __packed; /* -- cgit v1.2.3 From 72199320d49dbafa1a99f94f1cd60dc90035c154 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Mar 2018 17:33:32 +0100 Subject: timekeeping: Add the new CLOCK_MONOTONIC_ACTIVE clock The planned change to unify the behaviour of the MONOTONIC and BOOTTIME clocks vs. suspend removes the ability to retrieve the active non-suspended time of a system. Provide a new CLOCK_MONOTONIC_ACTIVE clock which returns the active non-suspended time of the system via clock_gettime(). This preserves the old behaviour of CLOCK_MONOTONIC before the BOOTTIME/MONOTONIC unification. This new clock also allows applications to detect programmatically that the MONOTONIC and BOOTTIME clocks are identical. Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20180301165149.965235774@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timekeeper_internal.h | 2 ++ include/linux/timekeeping.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 7acb953298a7..4b3dca173e89 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -52,6 +52,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai + * @time_suspended: Accumulated suspend time * @tai_offset: The current UTC to TAI offset in seconds * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events @@ -94,6 +95,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; + ktime_t time_suspended; s32 tai_offset; unsigned int clock_was_set_seq; u8 cs_was_changed_seq; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b17bcce58bc4..440b1935d3a5 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -32,6 +32,7 @@ extern void getrawmonotonic64(struct timespec64 *ts); extern void ktime_get_ts64(struct timespec64 *ts); extern time64_t ktime_get_seconds(void); extern time64_t ktime_get_real_seconds(void); +extern void ktime_get_active_ts64(struct timespec64 *ts); extern int __getnstimeofday64(struct timespec64 *tv); extern void getnstimeofday64(struct timespec64 *tv); -- cgit v1.2.3 From d6c7270e913db75ca5fdc79915ba780e97ae2857 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Mar 2018 17:33:35 +0100 Subject: timekeeping: Remove boot time specific code Now that the MONOTONIC and BOOTTIME clocks are the same, remove all the special handling from timekeeping. Keep wrappers for the existing users of the *boot* timekeeper interfaces. Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20180301165150.236279497@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 440b1935d3a5..abb396731332 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -38,15 +38,19 @@ extern int __getnstimeofday64(struct timespec64 *tv); extern void getnstimeofday64(struct timespec64 *tv); extern void getboottime64(struct timespec64 *ts); -#define ktime_get_real_ts64(ts) getnstimeofday64(ts) +#define ktime_get_real_ts64(ts) getnstimeofday64(ts) + +/* Clock BOOTTIME compatibility wrappers */ +static inline void get_monotonic_boottime64(struct timespec64 *ts) +{ + ktime_get_ts64(ts); +} /* * ktime_t based interfaces */ - enum tk_offsets { TK_OFFS_REAL, - TK_OFFS_BOOT, TK_OFFS_TAI, TK_OFFS_MAX, }; @@ -57,6 +61,10 @@ extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); extern ktime_t ktime_get_raw(void); extern u32 ktime_get_resolution_ns(void); +/* Clock BOOTTIME compatibility wrappers */ +static inline ktime_t ktime_get_boottime(void) { return ktime_get(); } +static inline u64 ktime_get_boot_ns(void) { return ktime_get(); } + /** * ktime_get_real - get the real (wall-) time in ktime_t format */ @@ -65,17 +73,6 @@ static inline ktime_t ktime_get_real(void) return ktime_get_with_offset(TK_OFFS_REAL); } -/** - * ktime_get_boottime - Returns monotonic time since boot in ktime_t format - * - * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the - * time spent in suspend. - */ -static inline ktime_t ktime_get_boottime(void) -{ - return ktime_get_with_offset(TK_OFFS_BOOT); -} - /** * ktime_get_clocktai - Returns the TAI time of day in ktime_t format */ @@ -102,11 +99,6 @@ static inline u64 ktime_get_real_ns(void) return ktime_to_ns(ktime_get_real()); } -static inline u64 ktime_get_boot_ns(void) -{ - return ktime_to_ns(ktime_get_boottime()); -} - static inline u64 ktime_get_tai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); @@ -119,17 +111,17 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); -extern u64 ktime_get_boot_fast_ns(void); extern u64 ktime_get_real_fast_ns(void); -/* - * timespec64 interfaces utilizing the ktime based ones - */ -static inline void get_monotonic_boottime64(struct timespec64 *ts) +/* Clock BOOTTIME compatibility wrappers */ +static inline u64 ktime_get_boot_fast_ns(void) { - *ts = ktime_to_timespec64(ktime_get_boottime()); + return ktime_get_mono_fast_ns(); } +/* + * timespec64 interfaces utilizing the ktime based ones + */ static inline void timekeeping_clocktai64(struct timespec64 *ts) { *ts = ktime_to_timespec64(ktime_get_clocktai()); -- cgit v1.2.3 From 127bfa5f4342e63d83a0b07ece376c2e8878e4a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Mar 2018 17:33:37 +0100 Subject: hrtimer: Unify MONOTONIC and BOOTTIME clock behavior Now that th MONOTONIC and BOOTTIME clocks are indentical remove all the special casing. The user space visible interfaces still support both clocks, but their behavior is identical. Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20180301165150.410218515@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c7902ca7c9f4..78f456fcd242 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -161,11 +161,9 @@ struct hrtimer_clock_base { enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_REALTIME, - HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, HRTIMER_BASE_MONOTONIC_SOFT, HRTIMER_BASE_REALTIME_SOFT, - HRTIMER_BASE_BOOTTIME_SOFT, HRTIMER_BASE_TAI_SOFT, HRTIMER_MAX_CLOCK_BASES, }; -- cgit v1.2.3 From 92af4dcb4e1c5f58dc337bc97bdffd4e853dbc93 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Mar 2018 17:33:38 +0100 Subject: tracing: Unify the "boot" and "mono" tracing clocks Unify the "boot" and "mono" tracing clocks and document the new behaviour. Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: John Stultz Cc: Jonathan Corbet Cc: Kevin Easton Cc: Linus Torvalds Cc: Mark Salyzyn Cc: Michael Kerrisk Cc: Peter Zijlstra Cc: Petr Mladek Cc: Prarit Bhargava Cc: Sergey Senozhatsky Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20180301165150.489635255@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index abb396731332..82c219dfd3bb 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -113,12 +113,6 @@ extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); extern u64 ktime_get_real_fast_ns(void); -/* Clock BOOTTIME compatibility wrappers */ -static inline u64 ktime_get_boot_fast_ns(void) -{ - return ktime_get_mono_fast_ns(); -} - /* * timespec64 interfaces utilizing the ktime based ones */ -- cgit v1.2.3 From 32ff77e8cc9e66cc4fb38098f64fd54cc8f54573 Mon Sep 17 00:00:00 2001 From: Milind Chabbi Date: Mon, 12 Mar 2018 14:45:47 +0100 Subject: perf/core: Implement fast breakpoint modification via _IOC_MODIFY_ATTRIBUTES Problem and motivation: Once a breakpoint perf event (PERF_TYPE_BREAKPOINT) is created, there is no flexibility to change the breakpoint type (bp_type), breakpoint address (bp_addr), or breakpoint length (bp_len). The only option is to close the perf event and configure a new breakpoint event. This inflexibility has a significant performance overhead. For example, sampling-based, lightweight performance profilers (and also concurrency bug detection tools), monitor different addresses for a short duration using PERF_TYPE_BREAKPOINT and change the address (bp_addr) to another address or change the kind of breakpoint (bp_type) from "write" to a "read" or vice-versa or change the length (bp_len) of the address being monitored. The cost of these modifications is prohibitive since it involves unmapping the circular buffer associated with the perf event, closing the perf event, opening another perf event and mmaping another circular buffer. Solution: The new ioctl flag for perf events, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, introduced in this patch takes a pointer to a struct perf_event_attr as an argument to update an old breakpoint event with new address, type, and size. This facility allows retaining a previous mmaped perf events ring buffer and avoids having to close and reopen another perf event. This patch supports only changing PERF_TYPE_BREAKPOINT event type; future implementations can extend this feature. The patch replicates some of its functionality of modify_user_hw_breakpoint() in kernel/events/hw_breakpoint.c. modify_user_hw_breakpoint cannot be called directly since perf_event_ctx_lock() is already held in _perf_ioctl(). Evidence: Experiments show that the baseline (not able to modify an already created breakpoint) costs an order of magnitude (~10x) more than the suggested optimization (having the ability to dynamically modifying a configured breakpoint via ioctl). When the breakpoints typically do not trap, the speedup due to the suggested optimization is ~10x; even when the breakpoints always trap, the speedup is ~4x due to the suggested optimization. Testing: tests posted at https://github.com/linux-contrib/perf_event_modify_bp demonstrate the performance significance of this patch. Tests also check the functional correctness of the patch. Signed-off-by: Milind Chabbi [ Using modify_user_hw_breakpoint_check function. ] [ Reformated PERF_EVENT_IOC_*, so the values are all in one column. ] Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Frederic Weisbecker Cc: Hari Bathini Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Michael Ellerman Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20180312134548.31532-8-jolsa@kernel.org Signed-off-by: Ingo Molnar --- include/linux/hw_breakpoint.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index cf045885a499..6058c3844a76 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -53,6 +53,9 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, /* FIXME: only change from the attr, and don't unregister */ extern int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr); +extern int +modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, + bool check); /* * Kernel breakpoints are not associated with any particular thread. @@ -97,6 +100,10 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, static inline int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { return -ENOSYS; } +static inline int +modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, + bool check) { return -ENOSYS; } + static inline struct perf_event * register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_overflow_handler_t triggered, -- cgit v1.2.3 From 31156ec378c2ed10330c8c06bbf36fb7d7a55506 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Mar 2018 17:28:39 +0100 Subject: bsg-lib: introduce a timeout field in struct bsg_job The zfcp driver wants to know the timeout for a bsg job, so add a field to struct bsg_job for it in preparation of not exposing the request to the bsg-lib users. Signed-off-by: Christoph Hellwig Reviewed-by: Benjamin Block Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/bsg-lib.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index b1be0233ce35..402223c95ce1 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -44,6 +44,8 @@ struct bsg_job { struct kref kref; + unsigned int timeout; + /* Transport/driver specific request/reply structs */ void *request; void *reply; -- cgit v1.2.3 From ef6fa64f9b8e1611854077ea9213f2eef2428cd2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Mar 2018 17:28:40 +0100 Subject: bsg-lib: remove bsg_job.req Users of the bsg-lib interface should only use the bsg_job data structure and not know about implementation details of it. Signed-off-by: Christoph Hellwig Reviewed-by: Benjamin Block Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/bsg-lib.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 402223c95ce1..08762d297cbd 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -40,7 +40,6 @@ struct bsg_buffer { struct bsg_job { struct scsi_request sreq; struct device *dev; - struct request *req; struct kref kref; -- cgit v1.2.3 From 17cb960f29c29ee07bf6848ada3265f4be55972e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Mar 2018 17:28:41 +0100 Subject: bsg: split handling of SCSI CDBs vs transport requeues The current BSG design tries to shoe-horn the transport-specific passthrough commands into the overall framework for SCSI passthrough requests. This has a couple problems: - each passthrough queue has to set the QUEUE_FLAG_SCSI_PASSTHROUGH flag despite not dealing with SCSI commands at all. Because of that these queues could also incorrectly accept SCSI commands from in-kernel users or through the legacy SCSI_IOCTL_SEND_COMMAND ioctl. - the real SCSI bsg queues also incorrectly accept bsg requests of the BSG_SUB_PROTOCOL_SCSI_TRANSPORT type - the bsg transport code is almost unredable because it tries to reuse different SCSI concepts for its own purpose. This patch instead adds a new bsg_ops structure to handle the two cases differently, and thus solves all of the above problems. Another side effect is that the bsg-lib queues also don't need to embedd a struct scsi_request anymore. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/bsg-lib.h | 4 +++- include/linux/bsg.h | 35 ++++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 08762d297cbd..28a7ccc55c89 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -38,7 +38,6 @@ struct bsg_buffer { }; struct bsg_job { - struct scsi_request sreq; struct device *dev; struct kref kref; @@ -64,6 +63,9 @@ struct bsg_job { struct bsg_buffer request_payload; struct bsg_buffer reply_payload; + int result; + unsigned int reply_payload_rcv_len; + void *dd_data; /* Used for driver-specific storage */ }; diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 2a202e41a3af..0c7dd9ceb139 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -1,34 +1,43 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef BSG_H -#define BSG_H +#ifndef _LINUX_BSG_H +#define _LINUX_BSG_H #include +struct request; + +#ifdef CONFIG_BLK_DEV_BSG +struct bsg_ops { + int (*check_proto)(struct sg_io_v4 *hdr); + int (*fill_hdr)(struct request *rq, struct sg_io_v4 *hdr, + fmode_t mode); + int (*complete_rq)(struct request *rq, struct sg_io_v4 *hdr); + void (*free_rq)(struct request *rq); +}; -#if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device { struct device *class_dev; struct device *parent; int minor; struct request_queue *queue; struct kref ref; + const struct bsg_ops *ops; void (*release)(struct device *); }; -extern int bsg_register_queue(struct request_queue *q, - struct device *parent, const char *name, - void (*release)(struct device *)); -extern void bsg_unregister_queue(struct request_queue *); +int bsg_register_queue(struct request_queue *q, struct device *parent, + const char *name, const struct bsg_ops *ops, + void (*release)(struct device *)); +int bsg_scsi_register_queue(struct request_queue *q, struct device *parent); +void bsg_unregister_queue(struct request_queue *q); #else -static inline int bsg_register_queue(struct request_queue *q, - struct device *parent, const char *name, - void (*release)(struct device *)) +static inline int bsg_scsi_register_queue(struct request_queue *q, + struct device *parent) { return 0; } static inline void bsg_unregister_queue(struct request_queue *q) { } -#endif - -#endif +#endif /* CONFIG_BLK_DEV_BSG */ +#endif /* _LINUX_BSG_H */ -- cgit v1.2.3 From be9fc0971a5c27b791608cf9705a04fe96dbd395 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Mar 2018 12:44:53 +0100 Subject: net: fix sysctl_fb_tunnels_only_for_init_net link error The new variable is only available when CONFIG_SYSCTL is enabled, otherwise we get a link error: net/ipv4/ip_tunnel.o: In function `ip_tunnel_init_net': ip_tunnel.c:(.text+0x278b): undefined reference to `sysctl_fb_tunnels_only_for_init_net' net/ipv6/sit.o: In function `sit_init_net': sit.c:(.init.text+0x4c): undefined reference to `sysctl_fb_tunnels_only_for_init_net' net/ipv6/ip6_tunnel.o: In function `ip6_tnl_init_net': ip6_tunnel.c:(.init.text+0x39): undefined reference to `sysctl_fb_tunnels_only_for_init_net' This adds an extra condition, keeping the traditional behavior when CONFIG_SYSCTL is disabled. Fixes: 79134e6ce2c9 ("net: do not create fallback tunnels for non-default namespaces") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5fbb9f1da7fd..913b1cc882cf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -589,7 +589,9 @@ extern int sysctl_fb_tunnels_only_for_init_net; static inline bool net_has_fallback_tunnels(const struct net *net) { - return net == &init_net || !sysctl_fb_tunnels_only_for_init_net; + return net == &init_net || + !IS_ENABLED(CONFIG_SYSCTL) || + !sysctl_fb_tunnels_only_for_init_net; } static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) -- cgit v1.2.3 From 2623c7a5f2799569d8bb05eb211da524a8144cb3 Mon Sep 17 00:00:00 2001 From: Taras Kondratiuk Date: Fri, 9 Mar 2018 08:34:41 +0000 Subject: libata: add refcounting to ata_host After commit 9a6d6a2ddabb ("ata: make ata port as parent device of scsi host") manual driver unbind/remove causes use-after-free. Unbind unconditionally invokes devres_release_all() which calls ata_host_release() and frees ata_host/ata_port memory while it is still being referenced as a parent of SCSI host. When SCSI host is finally released scsi_host_dev_release() calls put_device(parent) and accesses freed ata_port memory. Add reference counting to make sure that ata_host lives long enough. Bug report: https://lkml.org/lkml/2017/11/1/945 Fixes: 9a6d6a2ddabb ("ata: make ata port as parent device of scsi host") Cc: Tejun Heo Cc: Lin Ming Cc: linux-ide@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Taras Kondratiuk Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index ed9826b21c5e..1795fecdea17 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -617,6 +617,7 @@ struct ata_host { void *private_data; struct ata_port_operations *ops; unsigned long flags; + struct kref kref; struct mutex eh_mutex; struct task_struct *eh_owner; -- cgit v1.2.3 From c2b37f76485f073f020e60b5954b6dc4e55f693c Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Thu, 8 Mar 2018 15:51:41 +0200 Subject: IB/mlx5: Fix integer overflows in mlx5_ib_create_srq This patch validates user provided input to prevent integer overflow due to integer manipulation in the mlx5_ib_create_srq function. Cc: syzkaller Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Boris Pismenny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6ed79a8a8318..9d3a03364e6e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -453,8 +453,8 @@ struct mlx5_core_srq { struct mlx5_core_rsc_common common; /* must be first */ u32 srqn; int max; - int max_gs; - int max_avail_gather; + size_t max_gs; + size_t max_avail_gather; int wqe_shift; void (*event) (struct mlx5_core_srq *, enum mlx5_event); -- cgit v1.2.3 From 6417250d3f894e66a68ba1cd93676143f2376a6f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 6 Mar 2018 19:34:42 -0800 Subject: workqueue: remove unused cancel_work() Found this by accident. There are no usages of bare cancel_work() in current kernel source. Signed-off-by: Stephen Hemminger Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bc0cda180c8b..0c3301421c57 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -456,7 +456,6 @@ extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); -extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); -- cgit v1.2.3 From 2a4d2c4240c00e7db8fb64e377bd2180cc30b146 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 3 Mar 2018 10:53:24 +0100 Subject: PCI: Make pci_wakeup_bus() & pci_bus_set_current_state() public There are PCI devices which are power-manageable by a nonstandard means, such as a custom ACPI method. One example are discrete GPUs in hybrid graphics laptops, another are Thunderbolt controllers in Macs. Such devices can't be put into D3cold with pci_set_power_state() because pci_platform_power_transition() fails with -ENODEV. Instead they're put into D3hot by pci_set_power_state() and subsequently into D3cold by invoking the nonstandard means. However as a consequence the cached current_state is incorrectly left at D3hot. What we need to do is walk the hierarchy below such a PCI device on powerdown and update the current_state to D3cold. On powerup the PCI device itself and the hierarchy below it is in D0uninitialized, so we need to walk the hierarchy again and wake all devices, causing them to be put into D0active and then letting them autosuspend as they see fit. To this end make pci_wakeup_bus() & pci_bus_set_current_state() public so PCI drivers don't have to reinvent the wheel. Cc: Rafael J. Wysocki Acked-by: Bjorn Helgaas Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/2962443259e7faec577274b4ef8c54aad66f9a94.1520068884.git.lukas@wunner.de --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 024a1beda008..ae42289662df 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1147,6 +1147,8 @@ void pci_pme_wakeup_bus(struct pci_bus *bus); void pci_d3cold_enable(struct pci_dev *dev); void pci_d3cold_disable(struct pci_dev *dev); bool pcie_relaxed_ordering_enabled(struct pci_dev *dev); +void pci_wakeup_bus(struct pci_bus *bus); +void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state); /* PCI Virtual Channel */ int pci_save_vc_state(struct pci_dev *dev); -- cgit v1.2.3 From 07f4f97d7b4bf325d9f558c5b58230387e4e57e0 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 3 Mar 2018 10:53:24 +0100 Subject: vga_switcheroo: Use device link for HDA controller Back in 2013, runtime PM for GPUs with integrated HDA controller was introduced with commits 0d69704ae348 ("gpu/vga_switcheroo: add driver control power feature. (v3)") and 246efa4a072f ("snd/hda: add runtime suspend/resume on optimus support (v4)"). Briefly, the idea was that the HDA controller is forced on and off in unison with the GPU. The original code is mostly still in place even though it was never a 100% perfect solution: E.g. on access to the HDA controller, the GPU is powered up via vga_switcheroo_runtime_resume_hdmi_audio() but there are no provisions to keep it resumed until access to the HDA controller has ceased: The GPU autosuspends after 5 seconds, rendering the HDA controller inaccessible. Additionally, a kludge is required when hda_intel.c probes: It has to check whether the GPU is powered down (check_hdmi_disabled()) and defer probing if so. However in the meantime (in v4.10) the driver core has gained a feature called device links which promises to solve such issues in a clean way: It allows us to declare a dependency from the HDA controller (consumer) to the GPU (supplier). The PM core then automagically ensures that the GPU is runtime resumed as long as the HDA controller's ->probe hook is executed and whenever the HDA controller is accessed. By default, the HDA controller has a dependency on its parent, a PCIe Root Port. Adding a device link creates another dependency on its sibling: PCIe Root Port ^ ^ | | | | HDA ===> GPU The device link is not only used for runtime PM, it also guarantees that on system sleep, the HDA controller suspends before the GPU and resumes after the GPU, and on system shutdown the HDA controller's ->shutdown hook is executed before the one of the GPU. It is a complete solution. Using this functionality is as simple as calling device_link_add(), which results in a dmesg entry like this: pci 0000:01:00.1: Linked as a consumer to 0000:01:00.0 The code for the GPU-governed audio power management can thus be removed (except where it's still needed for legacy manual power control). The device link is added in a PCI quirk rather than in hda_intel.c. It is therefore legal for the GPU to runtime suspend to D3cold even if the HDA controller is not bound to a driver or if CONFIG_SND_HDA_INTEL is not enabled, for accesses to the HDA controller will cause the GPU to wake up regardless if they're occurring outside of hda_intel.c (think config space readout via sysfs). Contrary to the previous implementation, the HDA controller's power state is now self-governed, rather than GPU-governed, whereas the GPU's power state is no longer fully self-governed. (The HDA controller needs to runtime suspend before the GPU can.) It is thus crucial that runtime PM is always activated on the HDA controller even if CONFIG_SND_HDA_POWER_SAVE_DEFAULT is set to 0 (which is the default), lest the GPU stays awake. This is achieved by setting the auto_runtime_pm flag on every codec and the AZX_DCAPS_PM_RUNTIME flag on the HDA controller. A side effect is that power consumption might be reduced if the GPU is in use but the HDA controller is not, because the HDA controller is now allowed to go to D3hot. Before, it was forced to stay in D0 as long as the GPU was in use. (There is no reduction in power consumption on my Nvidia GK107, but there might be on other chips.) The code paths for legacy manual power control are adjusted such that runtime PM is disabled during power off, thereby preventing the PM core from resuming the HDA controller. Note that the device link is not only added on vga_switcheroo capable systems, but for *any* GPU with integrated HDA controller. The idea is that the HDA controller streams audio via connectors located on the GPU, so the GPU needs to be on for the HDA controller to do anything useful. This commit implicitly fixes an unbalanced runtime PM ref upon unbind of hda_intel.c: On ->probe, a runtime PM ref was previously released under the condition "azx_has_pm_runtime(chip) || hda->use_vga_switcheroo", but on ->remove a runtime PM ref was only acquired under the first of those conditions. Thus, binding and unbinding the driver twice on a vga_switcheroo capable system caused the runtime PM refcount to drop below zero. The issue is resolved because the AZX_DCAPS_PM_RUNTIME flag is now always set if use_vga_switcheroo is true. For more information on device links please refer to: https://www.kernel.org/doc/html/latest/driver-api/device_link.html Documentation/driver-api/device_link.rst Cc: Dave Airlie Cc: Ben Skeggs Cc: Alex Deucher Cc: Rafael J. Wysocki Acked-by: Bjorn Helgaas Reviewed-by: Takashi Iwai Reviewed-by: Peter Wu Tested-by: Kai Heng Feng # AMD PowerXpress Tested-by: Mike Lothian # AMD PowerXpress Tested-by: Denis Lisov # Nvidia Optimus Tested-by: Peter Wu # Nvidia Optimus Tested-by: Lukas Wunner # MacBook Pro Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/51bd38360ff502a8c42b1ebf4405ee1d3f27118d.1520068884.git.lukas@wunner.de --- include/linux/pci_ids.h | 1 + include/linux/vga_switcheroo.h | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a6b30667a331..a637a7d8ce5b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -45,6 +45,7 @@ #define PCI_CLASS_MULTIMEDIA_VIDEO 0x0400 #define PCI_CLASS_MULTIMEDIA_AUDIO 0x0401 #define PCI_CLASS_MULTIMEDIA_PHONE 0x0402 +#define PCI_CLASS_MULTIMEDIA_HD_AUDIO 0x0403 #define PCI_CLASS_MULTIMEDIA_OTHER 0x0480 #define PCI_BASE_CLASS_MEMORY 0x05 diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 960bedbdec87..77f0f0af3a71 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -168,11 +168,8 @@ int vga_switcheroo_process_delayed_switch(void); bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev); enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev); -void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic); - int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *domain); void vga_switcheroo_fini_domain_pm_ops(struct device *dev); -int vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev, struct dev_pm_domain *domain); #else static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {} @@ -192,11 +189,8 @@ static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } static inline bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev) { return false; } static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } -static inline void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic) {} - static inline int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *domain) { return -EINVAL; } static inline void vga_switcheroo_fini_domain_pm_ops(struct device *dev) {} -static inline int vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev, struct dev_pm_domain *domain) { return -EINVAL; } #endif #endif /* _LINUX_VGA_SWITCHEROO_H_ */ -- cgit v1.2.3 From dba0bc7b76dcf80f82f5a7542605d4abc52808f2 Mon Sep 17 00:00:00 2001 From: Derek Basehore Date: Wed, 28 Feb 2018 21:48:18 -0800 Subject: irqchip/gic-v3-its: Add ability to save/restore ITS state Some platforms power off GIC logic in suspend, so we need to save/restore state. The distributor and redistributor registers need to be handled in firmware code due to access permissions on those registers, but the ITS registers can be restored in the kernel. We limit this to systems where the ITS collections are implemented in HW (as opposed to being backed by memory tables), as they are the only ones that cannot be dealt with by the firmware. Signed-off-by: Derek Basehore [maz: fixed changelog, dropped DT property, limited to HCC being >0] Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c00c4c33e432..9aacea2aa938 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -312,7 +312,8 @@ #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) #define GITS_TYPER_PTA (1UL << 19) -#define GITS_TYPER_HWCOLLCNT_SHIFT 24 +#define GITS_TYPER_HCC_SHIFT 24 +#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff) #define GITS_TYPER_VMOVP (1ULL << 37) #define GITS_IIDR_REV_SHIFT 12 -- cgit v1.2.3 From 001f86137d3fca3c9002beaa7609c666715ebc70 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 12 Mar 2018 11:24:27 -0700 Subject: EDAC: Add new memory type for non-volatile DIMMs There are now non-volatile versions of DIMMs. Add a new entry to "enum mem_type" and a new string in edac_mem_types[]. Signed-off-by: Tony Luck Cc: "Rafael J. Wysocki" Cc: Aristeu Rozanski Cc: Dan Williams Cc: Jean Delvare Cc: Len Brown Cc: Mauro Carvalho Chehab Cc: Qiuxu Zhuo Cc: linux-acpi@vger.kernel.org Cc: linux-edac Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/20180312182430.10335-3-tony.luck@intel.com Signed-off-by: Borislav Petkov --- include/linux/edac.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index cd75c173fd00..bffb97828ed6 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -186,6 +186,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_RDDR4: Registered DDR4 RAM * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. + * @MEM_NVDIMM: Non-volatile RAM */ enum mem_type { MEM_EMPTY = 0, @@ -209,6 +210,7 @@ enum mem_type { MEM_DDR4, MEM_RDDR4, MEM_LRDDR4, + MEM_NVDIMM, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -231,6 +233,7 @@ enum mem_type { #define MEM_FLAG_DDR4 BIT(MEM_DDR4) #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) +#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) /** * enum edac-type - Error Detection and Correction capabilities and mode -- cgit v1.2.3 From 6deae96b42eb1fa84938088087de0bd748f53093 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 12 Mar 2018 11:24:29 -0700 Subject: firmware, DMI: Add function to look up a handle and return DIMM size When we first scan the SMBIOS table, save the size of the DIMM. Provide a function for other code (EDAC driver) to look up the size of a DIMM from its SMBIOS handle. Reviewed-by: Jean Delvare Signed-off-by: Tony Luck Cc: Aristeu Rozanski Cc: Dan Williams Cc: Len Brown Cc: Mauro Carvalho Chehab Cc: Qiuxu Zhuo Cc: "Rafael J. Wysocki" Cc: linux-acpi@vger.kernel.org Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/20180312182430.10335-5-tony.luck@intel.com Signed-off-by: Borislav Petkov --- include/linux/dmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 46e151172d95..7f5929123b69 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -113,6 +113,7 @@ extern int dmi_walk(void (*decode)(const struct dmi_header *, void *), void *private_data); extern bool dmi_match(enum dmi_field f, const char *str); extern void dmi_memdev_name(u16 handle, const char **bank, const char **device); +extern u64 dmi_memdev_size(u16 handle); #else @@ -142,6 +143,7 @@ static inline bool dmi_match(enum dmi_field f, const char *str) { return false; } static inline void dmi_memdev_name(u16 handle, const char **bank, const char **device) { } +static inline u64 dmi_memdev_size(u16 handle) { return ~0ul; } static inline const struct dmi_system_id * dmi_first_match(const struct dmi_system_id *list) { return NULL; } -- cgit v1.2.3 From 9c692d5ae7ea84d221d7504b7ebef07ea8f3ff27 Mon Sep 17 00:00:00 2001 From: Bogdan Purcareata Date: Fri, 2 Mar 2018 04:23:58 -0600 Subject: staging: fsl-mc: Move DPBP out of staging Move the source files out of staging into their final locations: - dpbp.c goes to drivers/bus/fsl-mc/, next to the core infrastructure - dpbp-cmd.h gets merged into drivers/bus/fsl-mc/fsl-mc-private.h, next to the other internally used APIs - dpbp.h gets merged into include/linux/fsl/mc.h, exposing the public API Update references in the dpaa2-eth staging driver. DPBP stands for Data Path Buffer Pool - you can read more about the object in Documentation/networking/dpaa2/overview.rst Signed-off-by: Bogdan Purcareata Reviewed-by: Laurentiu Tudor Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 765ba41f5987..66118e1fb5b9 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -451,4 +451,46 @@ static inline bool is_fsl_mc_bus_dprtc(const struct fsl_mc_device *mc_dev) return mc_dev->dev.type == &fsl_mc_bus_dprtc_type; } +/* + * Data Path Buffer Pool (DPBP) API + * Contains initialization APIs and runtime control APIs for DPBP + */ + +int dpbp_open(struct fsl_mc_io *mc_io, + u32 cmd_flags, + int dpbp_id, + u16 *token); + +int dpbp_close(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + +int dpbp_enable(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + +int dpbp_disable(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + +int dpbp_reset(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + +/** + * struct dpbp_attr - Structure representing DPBP attributes + * @id: DPBP object ID + * @bpid: Hardware buffer pool ID; should be used as an argument in + * acquire/release operations on buffers + */ +struct dpbp_attr { + int id; + u16 bpid; +}; + +int dpbp_get_attributes(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + struct dpbp_attr *attr); + #endif /* _FSL_MC_H_ */ -- cgit v1.2.3 From 70ae9cf015a165c33b63c9c7718f5a3c70e51f96 Mon Sep 17 00:00:00 2001 From: Bogdan Purcareata Date: Fri, 2 Mar 2018 04:23:59 -0600 Subject: staging: fsl-mc: Move DPCON out of staging Move the source files out of staging into their final locations: - dpcon.c goes to drivers/bus/fsl-mc/, next to the core infrastructure - dpcon-cmd.h gets merged into drivers/bus/fsl-mc/fsl-mc-private.h, next to the other internally used APIs - dpcon.h gets merged into include/linux/fsl/mc.h, exposing the public API Update references in the dpaa2-eth staging driver. DPCON stands for Data Path Concentrator - an interface between DPIO (Data Path IO) and its users (e.g. dpaa2-eth). You can read more about DPIO in Documentation/networking/dpaa2/overview.rst Signed-off-by: Bogdan Purcareata Reviewed-by: Laurentiu Tudor Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 66118e1fb5b9..cfb1fbf3a882 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -493,4 +493,70 @@ int dpbp_get_attributes(struct fsl_mc_io *mc_io, u16 token, struct dpbp_attr *attr); +/* Data Path Concentrator (DPCON) API + * Contains initialization APIs and runtime control APIs for DPCON + */ + +/** + * Use it to disable notifications; see dpcon_set_notification() + */ +#define DPCON_INVALID_DPIO_ID (int)(-1) + +int dpcon_open(struct fsl_mc_io *mc_io, + u32 cmd_flags, + int dpcon_id, + u16 *token); + +int dpcon_close(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + +int dpcon_enable(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + +int dpcon_disable(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + +int dpcon_reset(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token); + +/** + * struct dpcon_attr - Structure representing DPCON attributes + * @id: DPCON object ID + * @qbman_ch_id: Channel ID to be used by dequeue operation + * @num_priorities: Number of priorities for the DPCON channel (1-8) + */ +struct dpcon_attr { + int id; + u16 qbman_ch_id; + u8 num_priorities; +}; + +int dpcon_get_attributes(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + struct dpcon_attr *attr); + +/** + * struct dpcon_notification_cfg - Structure representing notification params + * @dpio_id: DPIO object ID; must be configured with a notification channel; + * to disable notifications set it to 'DPCON_INVALID_DPIO_ID'; + * @priority: Priority selection within the DPIO channel; valid values + * are 0-7, depending on the number of priorities in that channel + * @user_ctx: User context value provided with each CDAN message + */ +struct dpcon_notification_cfg { + int dpio_id; + u8 priority; + u64 user_ctx; +}; + +int dpcon_set_notification(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + struct dpcon_notification_cfg *cfg); + #endif /* _FSL_MC_H_ */ -- cgit v1.2.3 From 4dcb31d4649df36297296b819437709f5407059c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Mar 2018 09:04:16 -0700 Subject: net: use skb_to_full_sk() in skb_update_prio() Andrei Vagin reported a KASAN: slab-out-of-bounds error in skb_update_prio() Since SYNACK might be attached to a request socket, we need to get back to the listener socket. Since this listener is manipulated without locks, add const qualifiers to sock_cgroup_prioidx() so that the const can also be used in skb_update_prio() Also add the const qualifier to sock_cgroup_classid() for consistency. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Reported-by: Andrei Vagin Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 9f242b876fde..f8e76d01a5ad 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -755,13 +755,13 @@ struct sock_cgroup_data { * updaters and return part of the previous pointer as the prioidx or * classid. Such races are short-lived and the result isn't critical. */ -static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) +static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) { /* fallback to 1 which is always the ID of the root cgroup */ return (skcd->is_data & 1) ? skcd->prioidx : 1; } -static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) +static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { /* fallback to 0 which is the unconfigured default classid */ return (skcd->is_data & 1) ? skcd->classid : 0; -- cgit v1.2.3 From 83fc580dcc2f0f36114477c4ac7adbe5c32329a3 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Thu, 8 Mar 2018 16:03:27 -0800 Subject: Input: gpio-keys - add support for wakeup event action Add support for specifying event actions to trigger wakeup when using the gpio-keys input device as a wakeup source. This would allow the device to configure when to wakeup the system. For example a gpio-keys input device for pen insert, may only want to wakeup the system when ejecting the pen. Suggested-by: Brian Norris Signed-off-by: Jeffy Chen Reviewed-by: Rob Herring Signed-off-by: Dmitry Torokhov --- include/linux/gpio_keys.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index d06bf77400f1..7160df54a6fe 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -13,6 +13,7 @@ struct device; * @desc: label that will be attached to button's gpio * @type: input event type (%EV_KEY, %EV_SW, %EV_ABS) * @wakeup: configure the button as a wake-up source + * @wakeup_event_action: event action to trigger wakeup * @debounce_interval: debounce ticks interval in msecs * @can_disable: %true indicates that userspace is allowed to * disable button via sysfs @@ -26,6 +27,7 @@ struct gpio_keys_button { const char *desc; unsigned int type; int wakeup; + int wakeup_event_action; int debounce_interval; bool can_disable; int value; -- cgit v1.2.3 From c4ccc893ce2ab819171f797e8b2c702cc87cb84a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 12 Mar 2018 10:41:18 +0100 Subject: PCI: Add Altera vendor ID Add the Altera PCI Vendor id to pci_ids.h and remove the private definitions from xillybus_pcie.c and altera-cvp.c. Signed-off-by: Johannes Thumshirn Cc: Bjorn Helgaas Cc: Eli Billauer Cc: Anatolij Gustschin Acked-by: Eli Billauer Acked-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a6b30667a331..6a96a70fb462 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1561,6 +1561,8 @@ #define PCI_DEVICE_ID_SERVERWORKS_CSB6LPC 0x0227 #define PCI_DEVICE_ID_SERVERWORKS_HT1100LD 0x0408 +#define PCI_VENDOR_ID_ALTERA 0x1172 + #define PCI_VENDOR_ID_SBE 0x1176 #define PCI_DEVICE_ID_SBE_WANXL100 0x0301 #define PCI_DEVICE_ID_SBE_WANXL200 0x0302 -- cgit v1.2.3 From 0b2ed745e76debad33410870c4850ef2ca42f5e3 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Fri, 9 Mar 2018 14:46:55 +0000 Subject: nvmem: Document struct nvmem_config Add a simple description of struct nvmem_config and its fields. Cc: Srinivas Kandagatla Cc: Heiko Stuebner Cc: Masahiro Yamada Cc: Carlo Caione Cc: Kevin Hilman Cc: Matthias Brugger Cc: cphealy@gmail.com Cc: linux-kernel@vger.kernel.org Cc: linux-mediatek@lists.infradead.org Cc: linux-rockchip@lists.infradead.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Andrey Smirnov Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 497706f5adca..a39f76ff2ccd 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -22,6 +22,28 @@ typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, void *val, size_t bytes); +/** + * struct nvmem_config - NVMEM device configuration + * + * @dev: Parent device. + * @name: Optional name. + * @id: Optional device ID used in full name. Ignored if name is NULL. + * @owner: Pointer to exporter module. Used for refcounting. + * @cells: Optional array of pre-defined NVMEM cells. + * @ncells: Number of elements in cells. + * @read_only: Device is read-only. + * @root_only: Device is accessibly to root only. + * @reg_read: Callback to read data. + * @reg_write: Callback to write data. + * @size: Device size. + * @word_size: Minimum read/write access granularity. + * @stride: Minimum read/write access stride. + * @priv: User context passed to read/write callbacks. + * + * Note: A default "nvmem" name will be assigned to the device if + * no name is specified in its configuration. In such case "" is + * generated with ida_simple_get() and provided id field is ignored. + */ struct nvmem_config { struct device *dev; const char *name; -- cgit v1.2.3 From fd0f4906a3cdf2fedc980764a073f2313bdf1f47 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Fri, 9 Mar 2018 14:46:56 +0000 Subject: nvmem: core: Allow specifying device name verbatim Add code to allow avoid having nvmem core append a numeric suffix to the end of the name by passing config->id of -1. Cc: Srinivas Kandagatla Cc: Heiko Stuebner Cc: Masahiro Yamada Cc: Carlo Caione Cc: Kevin Hilman Cc: Matthias Brugger Cc: cphealy@gmail.com Cc: linux-kernel@vger.kernel.org Cc: linux-mediatek@lists.infradead.org Cc: linux-rockchip@lists.infradead.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Andrey Smirnov Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index a39f76ff2ccd..b00567a07496 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -43,6 +43,9 @@ typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, * Note: A default "nvmem" name will be assigned to the device if * no name is specified in its configuration. In such case "" is * generated with ida_simple_get() and provided id field is ignored. + * + * Note: Specifying name and setting id to -1 implies a unique device + * whose name is provided as-is (kept unaltered). */ struct nvmem_config { struct device *dev; -- cgit v1.2.3 From f1f50eca5f90527d2cca3479cda08883958777f6 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Fri, 9 Mar 2018 14:46:57 +0000 Subject: nvmem: Introduce devm_nvmem_(un)register() Introduce devm_nvmem_register()/devm_nvmem_unregister() to make .remove() unnecessary in trivial drivers. Cc: Srinivas Kandagatla Cc: Heiko Stuebner Cc: Masahiro Yamada Cc: Carlo Caione Cc: Kevin Hilman Cc: Matthias Brugger Cc: cphealy@gmail.com Cc: linux-kernel@vger.kernel.org Cc: linux-mediatek@lists.infradead.org Cc: linux-rockchip@lists.infradead.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Andrey Smirnov Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index b00567a07496..f89598bc4e1c 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -72,6 +72,11 @@ struct nvmem_config { struct nvmem_device *nvmem_register(const struct nvmem_config *cfg); int nvmem_unregister(struct nvmem_device *nvmem); +struct nvmem_device *devm_nvmem_register(struct device *dev, + const struct nvmem_config *cfg); + +int devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem); + #else static inline struct nvmem_device *nvmem_register(const struct nvmem_config *c) @@ -84,5 +89,17 @@ static inline int nvmem_unregister(struct nvmem_device *nvmem) return -ENOSYS; } +static inline struct nvmem_device * +devm_nvmem_register(struct device *dev, const struct nvmem_config *c) +{ + return nvmem_register(c); +} + +static inline int +devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem) +{ + return nvmem_unregister(nvmem); +} + #endif /* CONFIG_NVMEM */ #endif /* ifndef _LINUX_NVMEM_PROVIDER_H */ -- cgit v1.2.3 From 16ca6a607d84bef0129698d8d808f501afd08d43 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Mar 2018 21:48:01 +0000 Subject: KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid The vgic code is trying to be clever when injecting GICv2 SGIs, and will happily populate LRs with the same interrupt number if they come from multiple vcpus (after all, they are distinct interrupt sources). Unfortunately, this is against the letter of the architecture, and the GICv2 architecture spec says "Each valid interrupt stored in the List registers must have a unique VirtualID for that virtual CPU interface.". GICv3 has similar (although slightly ambiguous) restrictions. This results in guests locking up when using GICv2-on-GICv3, for example. The obvious fix is to stop trying so hard, and inject a single vcpu per SGI per guest entry. After all, pending SGIs with multiple source vcpus are pretty rare, and are mostly seen in scenario where the physical CPUs are severely overcomitted. But as we now only inject a single instance of a multi-source SGI per vcpu entry, we may delay those interrupts for longer than strictly necessary, and run the risk of injecting lower priority interrupts in the meantime. In order to address this, we adopt a three stage strategy: - If we encounter a multi-source SGI in the AP list while computing its depth, we force the list to be sorted - When populating the LRs, we prevent the injection of any interrupt of lower priority than that of the first multi-source SGI we've injected. - Finally, the injection of a multi-source SGI triggers the request of a maintenance interrupt when there will be no pending interrupt in the LRs (HCR_NPIE). At the point where the last pending interrupt in the LRs switches from Pending to Active, the maintenance interrupt will be delivered, allowing us to add the remaining SGIs using the same process. Cc: stable@vger.kernel.org Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework") Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 1 + include/linux/irqchip/arm-gic.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c00c4c33e432..b26eccc78fb1 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -503,6 +503,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_NPIE (1 << 3) #define ICH_HCR_TC (1 << 10) #define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index d3453ee072fc..68d8b1f73682 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -84,6 +84,7 @@ #define GICH_HCR_EN (1 << 0) #define GICH_HCR_UIE (1 << 1) +#define GICH_HCR_NPIE (1 << 3) #define GICH_LR_VIRTUALID (0x3ff << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) -- cgit v1.2.3 From 72f7cc09b143cf972c8c7571fc95d1017ba76c3d Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 13 Mar 2018 15:18:46 +0200 Subject: IB/mlx5: Expose more priorities for bypass namespace BYPASS namespace is used by the RDMA side to insert flow rules into the vport RX flow tables. Currently only 8 priorities are exposed, increase this to 16 to allow more flexibility. This change will also cause the BYPASS namespace to use 32 levels (as apposed to 16 today) of flow tables, 16 levels for regular rules and 16 for don't trap rules. Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e5258ee4e38b..413df3c11a46 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1204,8 +1204,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } -#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8 -#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8 +#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16 +#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 #define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\ MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\ -- cgit v1.2.3 From caacdbf4aa567ab5e8de1a4070195c5d3e8f1340 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 7 Mar 2018 15:57:27 -0800 Subject: genirq: Add CONFIG_GENERIC_IRQ_MULTI_HANDLER The arm multi irq handler registration mechanism has been copied into a handful of architectures, including arm64 and openrisc. RISC-V needs the same mechanism. Instead of adding yet another copy for RISC-V copy the arm implementation into the core code depending on a new Kconfig symbol: CONFIG_GENERIC_MULTI_IRQ_HANDLER. Subsequent patches will convert the various architectures. Signed-off-by: Palmer Dabbelt Signed-off-by: Thomas Gleixner Cc: jonas@southpole.se Cc: catalin.marinas@arm.com Cc: Will Deacon Cc: linux@armlinux.org.uk Cc: stefan.kristiansson@saunalahti.fi Cc: openrisc@lists.librecores.org Cc: shorne@gmail.com Cc: linux-riscv@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lkml.kernel.org/r/20180307235731.22627-2-palmer@sifive.com --- include/linux/irq.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 979eed1b2654..65916a305f3d 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1165,4 +1165,22 @@ int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest); int ipi_send_single(unsigned int virq, unsigned int cpu); int ipi_send_mask(unsigned int virq, const struct cpumask *dest); +#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER +/* + * Registers a generic IRQ handling function as the top-level IRQ handler in + * the system, which is generally the first C code called from an assembly + * architecture-specific interrupt handler. + * + * Returns 0 on success, or -EBUSY if an IRQ handler has already been + * registered. + */ +int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)); + +/* + * Allows interrupt handlers to find the irqchip that's been registered as the + * top-level IRQ handler. + */ +extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; +#endif + #endif /* _LINUX_IRQ_H */ -- cgit v1.2.3 From e36df28f532f882965404d58e240f2e058b61f45 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Tue, 13 Feb 2018 15:28:34 +0800 Subject: printk: move dump stack related code to lib/dump_stack.c dump_stack related stuff should belong to lib/dump_stack.c thus move them there. Also conditionally compile lib/dump_stack.c since dump_stack code does not make sense if printk is disabled. Link: http://lkml.kernel.org/r/20180213072834.GA24784@dhcp-128-65.nay.redhat.com To: Steven Rostedt Cc: linux-kernel@vger.kernel.org Cc: akpm@linux-foundation.org Cc: Andi Kleen Signed-off-by: Dave Young Suggested-by: Steven Rostedt Suggested-by: Sergey Senozhatsky Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index e9b603ee9953..6d7e800affd8 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -201,6 +201,7 @@ void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); +extern asmlinkage void dump_stack(void) __cold; extern void printk_safe_init(void); extern void printk_safe_flush(void); extern void printk_safe_flush_on_panic(void); @@ -264,6 +265,10 @@ static inline void show_regs_print_info(const char *log_lvl) { } +static inline asmlinkage void dump_stack(void) +{ +} + static inline void printk_safe_init(void) { } @@ -279,8 +284,6 @@ static inline void printk_safe_flush_on_panic(void) extern int kptr_restrict; -extern asmlinkage void dump_stack(void) __cold; - #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif -- cgit v1.2.3 From 71cfdd0bad3ad91680e6b82cac634154cf56376e Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 14 Mar 2018 19:25:06 +0100 Subject: libnvdimm: provide module_nd_driver wrapper Provide a module_nd_driver() wrapper over simple nd_driver_register() nd_driver_unregister() combinations in module_init() and module_exit() respectively. Note an explicit nd_driver_unregister() had to be implemented as nd bus drivers did call device_unregister() direcly in the module_exit() function. Signed-off-by: Johannes Thumshirn Signed-off-by: Dan Williams --- include/linux/nd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index 5dc6b695437d..43c181a6add5 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -180,6 +180,12 @@ struct nd_region; void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event); int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, struct module *module, const char *mod_name); +static inline void nd_driver_unregister(struct nd_device_driver *drv) +{ + driver_unregister(&drv->drv); +} #define nd_driver_register(driver) \ __nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +#define module_nd_driver(driver) \ + module_driver(driver, nd_driver_register, nd_driver_unregister) #endif /* __LINUX_ND_H__ */ -- cgit v1.2.3 From dcba51bbb9e0cc7f80d36eb20a033a4dff2ce9cc Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 12 Feb 2018 22:03:08 +0100 Subject: mtd: Get rid of unused fields in struct erase_info Some fields are not used by MTD drivers, users or core code. Moreover, those fields are not documented, so get rid of them to avoid any confusion. Signed-off-by: Boris Brezillon Reviewed-by: Richard Weinberger --- include/linux/mtd/mtd.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 205ededccc60..2a407dc9beaa 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -48,14 +48,9 @@ struct erase_info { uint64_t addr; uint64_t len; uint64_t fail_addr; - u_long time; - u_long retries; - unsigned dev; - unsigned cell; void (*callback) (struct erase_info *self); u_long priv; u_char state; - struct erase_info *next; }; struct mtd_erase_region_info { -- cgit v1.2.3 From 219c7b06f3da9ac2b51ed671881b20f1b127daef Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Sat, 10 Mar 2018 19:06:45 +0100 Subject: powerpc: Mark the variable earlycon_acpi_spcr_enable maybe_unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-use the object-like macro EARLYCON_USED_OR_UNUSED to mark `earlycon_acpi_spcr_enable` as maybe_unused. Fix the following warning (treated as error in W=1) CC arch/powerpc/kernel/setup-common.o In file included from ./include/linux/serial_8250.h:14:0, from arch/powerpc/kernel/setup-common.c:33: ./include/linux/serial_core.h:382:19: error: ‘earlycon_acpi_spcr_enable’ defined but not used [-Werror=unused-const-variable=] static const bool earlycon_acpi_spcr_enable; ^~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Signed-off-by: Mathieu Malaterre Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index b32df49a3bd5..1d356105f25a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -379,7 +379,7 @@ extern int of_setup_earlycon(const struct earlycon_id *match, extern bool earlycon_acpi_spcr_enable __initdata; int setup_earlycon(char *buf); #else -static const bool earlycon_acpi_spcr_enable; +static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif -- cgit v1.2.3 From 884cfd9023ce6afe8bcf181ec988d8516eb32bf0 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 12 Feb 2018 22:03:09 +0100 Subject: mtd: Stop assuming mtd_erase() is asynchronous None of the mtd->_erase() implementations work in an asynchronous manner, so let's simplify MTD users that call mtd_erase(). All they need to do is check the value returned by mtd_erase() and assume that != 0 means failure. Signed-off-by: Boris Brezillon Reviewed-by: Richard Weinberger --- include/linux/mtd/mtd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 2a407dc9beaa..5018437d7999 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -48,8 +48,6 @@ struct erase_info { uint64_t addr; uint64_t len; uint64_t fail_addr; - void (*callback) (struct erase_info *self); - u_long priv; u_char state; }; -- cgit v1.2.3 From 8ddfb47294c0f45a476a92ecf1e56cb5990c5468 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Fri, 9 Feb 2018 10:13:57 +0100 Subject: drivers: base: add description for .coredump() callback Commit 3c47d19ff4dc ("drivers: base: add coredump driver ops") added a new callback in struct device_driver, but not a kerneldoc description so here it is. Fixes: 3c47d19ff4dc ("drivers: base: add coredump driver ops") Signed-off-by: Arend van Spriel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index b093405ed525..0b32a42db4ae 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -256,6 +256,7 @@ enum probe_type { * automatically. * @pm: Power management operations of the device which matched * this driver. + * @coredump: Called through sysfs to initiate a device coredump. * @p: Driver core's private data, no one other than the driver * core can touch this. * -- cgit v1.2.3 From 8f347c4232d5fc097599b711a3385722a6834005 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 12 Feb 2018 22:03:10 +0100 Subject: mtd: Unconditionally update ->fail_addr and ->addr in part_erase() ->fail_addr and ->addr can be updated no matter the result of parent->_erase(), we just need to remove the code doing the same thing in mtd_erase_callback() to avoid adjusting those fields twice. Note that this can be done because all MTD users have been converted to not pass an erase_info->callback() and are thus only taking the ->addr_fail and ->addr fields into account after part_erase() has returned. While we're at it, get rid of the erase_info->mtd field which was only needed to let mtd_erase_callback() get the partition device back. Signed-off-by: Boris Brezillon Reviewed-by: Richard Weinberger --- include/linux/mtd/mtd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 5018437d7999..4cbb7f555244 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -38,13 +38,14 @@ #define MTD_FAIL_ADDR_UNKNOWN -1LL +struct mtd_info; + /* * If the erase fails, fail_addr might indicate exactly which block failed. If * fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level * or was not specific to any particular block. */ struct erase_info { - struct mtd_info *mtd; uint64_t addr; uint64_t len; uint64_t fail_addr; -- cgit v1.2.3 From 48962f5c6fffcb676dd6ebd70f7869cfc6cc8356 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Mar 2018 16:26:46 -0600 Subject: RDMA/mlx4: Move flag constants to uapi header MLX4_USER_DEV_CAP_LARGE_CQE (via mlx4_ib_alloc_ucontext_resp.dev_caps) and MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET (via mlx4_uverbs_ex_query_device_resp.comp_mask) are copied directly to userspace and form part of the uAPI. Move them to the uapi header where they belong. Signed-off-by: Jason Gunthorpe --- include/linux/mlx4/device.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a9b5fed8f7c6..81d0799b6091 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -256,10 +256,6 @@ enum { MLX4_DEV_CAP_EQE_STRIDE_ENABLED = 1LL << 3 }; -enum { - MLX4_USER_DEV_CAP_LARGE_CQE = 1L << 0 -}; - enum { MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0, MLX4_FUNC_CAP_EQE_CQE_STRIDE = 1L << 1, -- cgit v1.2.3 From 95dd77580ccd66a0da96e6d4696945b8cea39431 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 14 Mar 2018 18:20:29 -0500 Subject: fs: Teach path_connected to handle nfs filesystems with multiple roots. On nfsv2 and nfsv3 the nfs server can export subsets of the same filesystem and report the same filesystem identifier, so that the nfs client can know they are the same filesystem. The subsets can be from disjoint directory trees. The nfsv2 and nfsv3 filesystems provides no way to find the common root of all directory trees exported form the server with the same filesystem identifier. The practical result is that in struct super s_root for nfs s_root is not necessarily the root of the filesystem. The nfs mount code sets s_root to the root of the first subset of the nfs filesystem that the kernel mounts. This effects the dcache invalidation code in generic_shutdown_super currently called shrunk_dcache_for_umount and that code for years has gone through an additional list of dentries that might be dentry trees that need to be freed to accomodate nfs. When I wrote path_connected I did not realize nfs was so special, and it's hueristic for avoiding calling is_subdir can fail. The practical case where this fails is when there is a move of a directory from the subtree exposed by one nfs mount to the subtree exposed by another nfs mount. This move can happen either locally or remotely. With the remote case requiring that the move directory be cached before the move and that after the move someone walks the path to where the move directory now exists and in so doing causes the already cached directory to be moved in the dcache through the magic of d_splice_alias. If someone whose working directory is in the move directory or a subdirectory and now starts calling .. from the initial mount of nfs (where s_root == mnt_root), then path_connected as a heuristic will not bother with the is_subdir check. As s_root really is not the root of the nfs filesystem this heuristic is wrong, and the path may actually not be connected and path_connected can fail. The is_subdir function might be cheap enough that we can call it unconditionally. Verifying that will take some benchmarking and the result may not be the same on all kernels this fix needs to be backported to. So I am avoiding that for now. Filesystems with snapshots such as nilfs and btrfs do something similar. But as the directory tree of the snapshots are disjoint from one another and from the main directory tree rename won't move things between them and this problem will not occur. Cc: stable@vger.kernel.org Reported-by: Al Viro Fixes: 397d425dc26d ("vfs: Test for and handle paths that are unreachable from their mnt_root") Signed-off-by: "Eric W. Biederman" Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a815560fda0..0430e03febaa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1317,6 +1317,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ +#define SB_I_MULTIROOT 0x00000008 /* Multiple roots to the dentry tree */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ -- cgit v1.2.3 From 4ba66a9760722ccbb691b8f7116cad2f791cca7b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Mar 2018 22:23:24 +0100 Subject: arch: remove blackfin port The Analog Devices Blackfin port was added in 2007 and was rather active for a while, but all work on it has come to a standstill over time, as Analog have changed their product line-up. Aaron Wu confirmed that the architecture port is no longer relevant, and multiple people suggested removing blackfin independently because of some of its oddities like a non-working SMP port, and the amount of duplication between the chip variants, which cause extra work when doing cross-architecture changes. Link: https://docs.blackfin.uclinux.org/ Acked-by: Aaron Wu Acked-by: Bryan Wu Cc: Steven Miao Cc: Mike Frysinger Signed-off-by: Arnd Bergmann --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 5b211fe295f0..8796ba387152 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -29,7 +29,6 @@ enum cpuhp_state { CPUHP_PERF_PREPARE, CPUHP_PERF_X86_PREPARE, CPUHP_PERF_X86_AMD_UNCORE_PREP, - CPUHP_PERF_BFIN, CPUHP_PERF_POWER, CPUHP_PERF_SUPERH, CPUHP_X86_HPET_DEAD, -- cgit v1.2.3 From 79375ea3ec527f746d5beae8c8f6e8a58740d4a8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 23:14:56 +0100 Subject: mm: remove obsolete alloc_remap() Tile was the only remaining architecture to implement alloc_remap(), and since that is being removed, there is no point in keeping this function. Removing all callers simplifies the mem_map handling. Reviewed-by: Pavel Tatashin Signed-off-by: Arnd Bergmann --- include/linux/bootmem.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index a53063e9d7d8..7942a96b1a9d 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -364,15 +364,6 @@ static inline void __init memblock_free_late( } #endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */ -#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP -extern void *alloc_remap(int nid, unsigned long size); -#else -static inline void *alloc_remap(int nid, unsigned long size) -{ - return NULL; -} -#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */ - extern void *alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, -- cgit v1.2.3 From cbe7128c4b92e2004984f477fd38dfa81662f02e Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 13 Mar 2018 14:51:28 +0900 Subject: vlan: Fix out of order vlan headers with reorder header off With reorder header off, received packets are untagged in skb_vlan_untag() called from within __netif_receive_skb_core(), and later the tag will be inserted back in vlan_do_receive(). This caused out of order vlan headers when we create a vlan device on top of another vlan device, because vlan_do_receive() inserts a tag as the outermost vlan tag. E.g. the outer tag is first removed in skb_vlan_untag() and inserted back in vlan_do_receive(), then the inner tag is next removed and inserted back as the outermost tag. This patch fixes the behaviour by inserting the inner tag at the right position. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 66 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 5e6a2d4dc366..c4a1cff9c768 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -300,30 +300,34 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, } /** - * __vlan_insert_tag - regular VLAN tag inserting + * __vlan_insert_inner_tag - inner VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert + * @mac_len: MAC header length including outer vlan headers * - * Inserts the VLAN tag into @skb as part of the payload + * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns error if skb_cow_head failes. * * Does not change skb->protocol so this function can be used during receive. */ -static inline int __vlan_insert_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline int __vlan_insert_inner_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci, + unsigned int mac_len) { struct vlan_ethhdr *veth; if (skb_cow_head(skb, VLAN_HLEN) < 0) return -ENOMEM; - veth = skb_push(skb, VLAN_HLEN); + skb_push(skb, VLAN_HLEN); - /* Move the mac addresses to the beginning of the new header. */ - memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN); + /* Move the mac header sans proto to the beginning of the new header. */ + memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); skb->mac_header -= VLAN_HLEN; + veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN); + /* first, the ethernet type */ veth->h_vlan_proto = vlan_proto; @@ -334,12 +338,30 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, } /** - * vlan_insert_tag - regular VLAN tag inserting + * __vlan_insert_tag - regular VLAN tag inserting * @skb: skbuff to tag * @vlan_proto: VLAN encapsulation protocol * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload + * Returns error if skb_cow_head failes. + * + * Does not change skb->protocol so this function can be used during receive. + */ +static inline int __vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) +{ + return __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); +} + +/** + * vlan_insert_inner_tag - inner VLAN tag inserting + * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol + * @vlan_tci: VLAN TCI to insert + * @mac_len: MAC header length including outer vlan headers + * + * Inserts the VLAN tag into @skb as part of the payload at offset mac_len * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. * * Following the skb_unshare() example, in case of error, the calling function @@ -347,12 +369,14 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, * * Does not change skb->protocol so this function can be used during receive. */ -static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, - __be16 vlan_proto, u16 vlan_tci) +static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, + __be16 vlan_proto, + u16 vlan_tci, + unsigned int mac_len) { int err; - err = __vlan_insert_tag(skb, vlan_proto, vlan_tci); + err = __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, mac_len); if (err) { dev_kfree_skb_any(skb); return NULL; @@ -360,6 +384,26 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, return skb; } +/** + * vlan_insert_tag - regular VLAN tag inserting + * @skb: skbuff to tag + * @vlan_proto: VLAN encapsulation protocol + * @vlan_tci: VLAN TCI to insert + * + * Inserts the VLAN tag into @skb as part of the payload + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + * + * Does not change skb->protocol so this function can be used during receive. + */ +static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, + __be16 vlan_proto, u16 vlan_tci) +{ + return vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN); +} + /** * vlan_insert_tag_set_proto - regular VLAN tag inserting * @skb: skbuff to tag -- cgit v1.2.3 From 79ffdfc6522ae33d8a33e971070c08ee5f27439b Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Wed, 14 Mar 2018 22:17:20 +0300 Subject: net: Add rtnl_lock_killable() rtnl_lock() is widely used mutex in kernel. Some of kernel code does memory allocations under it. In case of memory deficit this may invoke OOM killer, but the problem is a killed task can't exit if it's waiting for the mutex. This may be a reason of deadlock and panic. This patch adds a new primitive, which responds on SIGKILL, and it allows to use it in the places, where we don't want to sleep forever. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 3573b4bf2fdf..562a175c35a9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -33,6 +33,7 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); +extern int rtnl_lock_killable(void); extern wait_queue_head_t netdev_unregistering_wq; extern struct rw_semaphore net_sem; -- cgit v1.2.3 From 4c6994806f708559c2812b73501406e21ae5dcd0 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 16 Mar 2018 14:51:27 +0800 Subject: blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir() We've triggered a WARNING in blk_throtl_bio() when throttling writeback io, which complains blkg->refcnt is already 0 when calling blkg_get(), and then kernel crashes with invalid page request. After investigating this issue, we've found it is caused by a race between blkcg_bio_issue_check() and cgroup_rmdir(), which is described below: writeback kworker cgroup_rmdir cgroup_destroy_locked kill_css css_killed_ref_fn css_killed_work_fn offline_css blkcg_css_offline blkcg_bio_issue_check rcu_read_lock blkg_lookup spin_trylock(q->queue_lock) blkg_destroy spin_unlock(q->queue_lock) blk_throtl_bio spin_lock_irq(q->queue_lock) ... spin_unlock_irq(q->queue_lock) rcu_read_unlock Since rcu can only prevent blkg from releasing when it is being used, the blkg->refcnt can be decreased to 0 during blkg_destroy() and schedule blkg release. Then trying to blkg_get() in blk_throtl_bio() will complains the WARNING. And then the corresponding blkg_put() will schedule blkg release again, which result in double free. This race is introduced by commit ae1188963611 ("blkcg: consolidate blkg creation in blkcg_bio_issue_check()"). Before this commit, it will lookup first and then try to lookup/create again with queue_lock. Since revive this logic is a bit drastic, so fix it by only offlining pd during blkcg_css_offline(), and move the rest destruction (especially blkg_put()) into blkcg_css_free(), which should be the right way as discussed. Fixes: ae1188963611 ("blkcg: consolidate blkg creation in blkcg_bio_issue_check()") Reported-by: Jiufei Xue Signed-off-by: Joseph Qi Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 69bea82ebeb1..6c666fd7de3c 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -88,6 +88,7 @@ struct blkg_policy_data { /* the blkg and policy id this per-policy data belongs to */ struct blkcg_gq *blkg; int plid; + bool offline; }; /* -- cgit v1.2.3 From f29ab49b5388b2f829cf99859bc5f8ad8ec4d06a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Mar 2018 14:25:40 +0100 Subject: dma-mapping: Convert NO_DMA get_dma_ops() into a real dummy If NO_DMA=y, get_dma_ops() returns a reference to the non-existing symbol bad_dma_ops, thus causing a link failure if it is ever used. Make get_dma_ops() return NULL instead, to avoid the link failure. This allows to improve compile-testing, and limits the need to keep on sprinkling dependencies on HAS_DMA all over the place. Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Acked-by: Robin Murphy Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index eb9eab4ecd6d..5ea7eec83c0f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -212,14 +212,14 @@ static inline void set_dma_ops(struct device *dev, } #else /* - * Define the dma api to allow compilation but not linking of - * dma dependent code. Code that depends on the dma-mapping - * API needs to set 'depends on HAS_DMA' in its Kconfig + * Define the dma api to allow compilation of dma dependent code. + * Code that depends on the dma-mapping API needs to set 'depends on HAS_DMA' + * in its Kconfig, unless it already depends on || COMPILE_TEST, + * where guarantuees the availability of the dma-mapping API. */ -extern const struct dma_map_ops bad_dma_ops; static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { - return &bad_dma_ops; + return NULL; } #endif -- cgit v1.2.3 From ab642e952f80c66c5592f0e2c35588843a813df8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Mar 2018 14:25:41 +0100 Subject: dma-coherent: Add NO_DMA dummies for managed DMA API Add dummies for dmam_{alloc,free}_coherent(), to allow compile-testing if NO_DMA=y. This prevents the following from showing up later: ERROR: "dmam_alloc_coherent" [drivers/net/ethernet/arc/arc_emac.ko] undefined! ERROR: "dmam_free_coherent" [drivers/net/ethernet/apm/xgene/xgene-enet.ko] undefined! ERROR: "dmam_alloc_coherent" [drivers/net/ethernet/apm/xgene/xgene-enet.ko] undefined! ERROR: "dmam_alloc_coherent" [drivers/mtd/nand/hisi504_nand.ko] undefined! ERROR: "dmam_alloc_coherent" [drivers/mmc/host/dw_mmc.ko] undefined! Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Acked-by: Robin Murphy Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 5ea7eec83c0f..94f41846b933 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -776,10 +776,19 @@ static inline void dma_deconfigure(struct device *dev) {} /* * Managed DMA API */ +#ifdef CONFIG_HAS_DMA extern void *dmam_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); extern void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); +#else /* !CONFIG_HAS_DMA */ +static inline void *dmam_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ return NULL; } +static inline void dmam_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) { } +#endif /* !CONFIG_HAS_DMA */ + extern void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); -- cgit v1.2.3 From c1ce6c2beea38171a57c56e55875318cef9a2ad5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Mar 2018 14:25:43 +0100 Subject: mm: Add NO_DMA dummies for DMA pool API Add dummies for dma{,m}_pool_{create,destroy,alloc,free}(), to allow compile-testing if NO_DMA=y. This prevents the following from showing up later: ERROR: "dma_pool_destroy" [drivers/usb/mtu3/mtu3.ko] undefined! ERROR: "dma_pool_free" [drivers/usb/mtu3/mtu3.ko] undefined! ERROR: "dma_pool_alloc" [drivers/usb/mtu3/mtu3.ko] undefined! ERROR: "dma_pool_create" [drivers/usb/mtu3/mtu3.ko] undefined! ERROR: "dma_pool_destroy" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined! ERROR: "dma_pool_free" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined! ERROR: "dma_pool_alloc" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined! ERROR: "dma_pool_create" [drivers/scsi/hisi_sas/hisi_sas_main.ko] undefined! ERROR: "dma_pool_alloc" [drivers/mailbox/bcm-pdc-mailbox.ko] undefined! ERROR: "dma_pool_free" [drivers/mailbox/bcm-pdc-mailbox.ko] undefined! ERROR: "dma_pool_create" [drivers/mailbox/bcm-pdc-mailbox.ko] undefined! ERROR: "dma_pool_destroy" [drivers/mailbox/bcm-pdc-mailbox.ko] undefined! Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Acked-by: Robin Murphy Signed-off-by: Christoph Hellwig --- include/linux/dmapool.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 53ba737505df..f632ecfb4238 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -16,6 +16,8 @@ struct device; +#ifdef CONFIG_HAS_DMA + struct dma_pool *dma_pool_create(const char *name, struct device *dev, size_t size, size_t align, size_t allocation); @@ -23,13 +25,6 @@ void dma_pool_destroy(struct dma_pool *pool); void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle); - -static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, - dma_addr_t *handle) -{ - return dma_pool_alloc(pool, mem_flags | __GFP_ZERO, handle); -} - void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr); /* @@ -39,5 +34,26 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev, size_t size, size_t align, size_t allocation); void dmam_pool_destroy(struct dma_pool *pool); +#else /* !CONFIG_HAS_DMA */ +static inline struct dma_pool *dma_pool_create(const char *name, + struct device *dev, size_t size, size_t align, size_t allocation) +{ return NULL; } +static inline void dma_pool_destroy(struct dma_pool *pool) { } +static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, + dma_addr_t *handle) { return NULL; } +static inline void dma_pool_free(struct dma_pool *pool, void *vaddr, + dma_addr_t addr) { } +static inline struct dma_pool *dmam_pool_create(const char *name, + struct device *dev, size_t size, size_t align, size_t allocation) +{ return NULL; } +static inline void dmam_pool_destroy(struct dma_pool *pool) { } +#endif /* !CONFIG_HAS_DMA */ + +static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, + dma_addr_t *handle) +{ + return dma_pool_alloc(pool, mem_flags | __GFP_ZERO, handle); +} + #endif -- cgit v1.2.3 From 1f674e16f9ce6eb20ee2e81ae7514737376874de Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Mar 2018 14:25:42 +0100 Subject: usb: gadget: Add NO_DMA dummies for DMA mapping API Add dummies for usb_gadget_{,un}map_request{,_by_dev}(), to allow compile-testing if NO_DMA=y. This prevents the following from showing up later: ERROR: "usb_gadget_unmap_request_by_dev" [drivers/usb/renesas_usbhs/renesas_usbhs.ko] undefined! ERROR: "usb_gadget_map_request_by_dev" [drivers/usb/renesas_usbhs/renesas_usbhs.ko] undefined! ERROR: "usb_gadget_map_request" [drivers/usb/mtu3/mtu3.ko] undefined! ERROR: "usb_gadget_unmap_request" [drivers/usb/mtu3/mtu3.ko] undefined! ERROR: "usb_gadget_map_request" [drivers/usb/gadget/udc/renesas_usb3.ko] undefined! ERROR: "usb_gadget_unmap_request" [drivers/usb/gadget/udc/renesas_usb3.ko] undefined! Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Acked-by: Felipe Balbi Acked-by: Greg Kroah-Hartman Acked-by: Robin Murphy Signed-off-by: Christoph Hellwig --- include/linux/usb/gadget.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 66a5cff7ee14..b68e7f9b210b 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -805,6 +805,7 @@ int usb_otg_descriptor_init(struct usb_gadget *gadget, /* utility to simplify map/unmap of usb_requests to/from DMA */ +#ifdef CONFIG_HAS_DMA extern int usb_gadget_map_request_by_dev(struct device *dev, struct usb_request *req, int is_in); extern int usb_gadget_map_request(struct usb_gadget *gadget, @@ -814,6 +815,17 @@ extern void usb_gadget_unmap_request_by_dev(struct device *dev, struct usb_request *req, int is_in); extern void usb_gadget_unmap_request(struct usb_gadget *gadget, struct usb_request *req, int is_in); +#else /* !CONFIG_HAS_DMA */ +static inline int usb_gadget_map_request_by_dev(struct device *dev, + struct usb_request *req, int is_in) { return -ENOSYS; } +static inline int usb_gadget_map_request(struct usb_gadget *gadget, + struct usb_request *req, int is_in) { return -ENOSYS; } + +static inline void usb_gadget_unmap_request_by_dev(struct device *dev, + struct usb_request *req, int is_in) { } +static inline void usb_gadget_unmap_request(struct usb_gadget *gadget, + struct usb_request *req, int is_in) { } +#endif /* !CONFIG_HAS_DMA */ /*-------------------------------------------------------------------------*/ -- cgit v1.2.3 From edb39592a5877bd91b2e6ee15194268f35b04892 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Mar 2018 17:36:56 +0100 Subject: perf: Fix sibling iteration Mark noticed that the change to sibling_list changed some iteration semantics; because previously we used group_list as list entry, sibling events would always have an empty sibling_list. But because we now use sibling_list for both list head and list entry, siblings will report as having siblings. Fix this with a custom for_each_sibling_event() iterator. Fixes: 8343aae66167 ("perf/core: Remove perf_event::group_entry") Reported-by: Mark Rutland Suggested-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: vincent.weaver@maine.edu Cc: alexander.shishkin@linux.intel.com Cc: torvalds@linux-foundation.org Cc: alexey.budankov@linux.intel.com Cc: valery.cherepennikov@intel.com Cc: eranian@google.com Cc: acme@redhat.com Cc: linux-tip-commits@vger.kernel.org Cc: davidcc@google.com Cc: kan.liang@intel.com Cc: Dmitry.Prohorov@intel.com Cc: jolsa@redhat.com Link: https://lkml.kernel.org/r/20180315170129.GX4043@hirez.programming.kicks-ass.net --- include/linux/perf_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2bb200e1bbea..ff39ab011376 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -536,6 +536,10 @@ struct pmu_event_list { struct list_head list; }; +#define for_each_sibling_event(sibling, event) \ + if ((event)->group_leader == (event)) \ + list_for_each_entry((sibling), &(event)->sibling_list, sibling_list) + /** * struct perf_event - performance event kernel representation: */ -- cgit v1.2.3 From 6e0d4ff4580c1272f4e4860bf22841ef31fd31ba Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Tue, 23 Jan 2018 20:24:45 +0800 Subject: clk: add more __must_check for bulk APIs we need it even when !CONFIG_HAVE_CLK because it allows us to catch missing checking return values in the non-clk compile configurations too. More test coverage. Cc: Stephen Boyd Suggested-by: Stephen Boyd Signed-off-by: Dong Aisheng Signed-off-by: Stephen Boyd --- include/linux/clk.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 4c4ef9f34db3..0dbd0885b2c2 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -209,7 +209,7 @@ static inline int clk_prepare(struct clk *clk) return 0; } -static inline int clk_bulk_prepare(int num_clks, struct clk_bulk_data *clks) +static inline int __must_check clk_bulk_prepare(int num_clks, struct clk_bulk_data *clks) { might_sleep(); return 0; @@ -603,8 +603,8 @@ static inline struct clk *clk_get(struct device *dev, const char *id) return NULL; } -static inline int clk_bulk_get(struct device *dev, int num_clks, - struct clk_bulk_data *clks) +static inline int __must_check clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks) { return 0; } @@ -614,8 +614,8 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id) return NULL; } -static inline int devm_clk_bulk_get(struct device *dev, int num_clks, - struct clk_bulk_data *clks) +static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks) { return 0; } @@ -645,7 +645,7 @@ static inline int clk_enable(struct clk *clk) return 0; } -static inline int clk_bulk_enable(int num_clks, struct clk_bulk_data *clks) +static inline int __must_check clk_bulk_enable(int num_clks, struct clk_bulk_data *clks) { return 0; } @@ -719,8 +719,8 @@ static inline void clk_disable_unprepare(struct clk *clk) clk_unprepare(clk); } -static inline int clk_bulk_prepare_enable(int num_clks, - struct clk_bulk_data *clks) +static inline int __must_check clk_bulk_prepare_enable(int num_clks, + struct clk_bulk_data *clks) { int ret; -- cgit v1.2.3 From 63189b785960c3346d1af347516b7438f7ada8ec Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 8 Mar 2018 14:22:56 +0800 Subject: f2fs: wrap all options with f2fs_sb_info.mount_opt This patch merges miscellaneous mount options into struct f2fs_mount_info, After this patch, once we add new mount option, we don't need to worry about recovery of it in remount_fs(), since we will recover the f2fs_sb_info.mount_opt including all options. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index b06ab1f04ff6..124787e8db58 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -39,10 +39,10 @@ #define F2FS_MAX_QUOTAS 3 -#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ -#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ -#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */ +#define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ +#define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ +#define F2FS_IO_SIZE_BYTES(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */ +#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ #define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) /* This flag is used by node and meta inodes, and by recovery */ -- cgit v1.2.3 From bb1105e479fbb8b0edc6f35affec71b75e31c8c0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 9 Mar 2018 17:42:28 -0800 Subject: f2fs: align memory boundary for bitops For example, in arm64, free_nid_bitmap should be aligned to word size in order to use bit operations. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 124787e8db58..aa5db8b5521a 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -305,6 +305,10 @@ struct f2fs_node { */ #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) #define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8) +#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \ + ((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \ + BITS_PER_LONG * BITS_PER_LONG) + struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ -- cgit v1.2.3 From 71db049e7355f31604e2c04b6cabb71d02bd487d Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sat, 17 Feb 2018 14:58:40 +0100 Subject: rtc: Add RTC range Add a way for drivers to inform the core of the supported date/time range. The core can then check whether the date/time or alarm is in the range before calling ->set_time, ->set_mmss or ->set_alarm. It returns -ERANGE when the time is out of range. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 3b65b201169c..c78528c394e5 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -150,6 +150,9 @@ struct rtc_device { bool nvram_old_abi; struct bin_attribute *nvram; + time64_t range_min; + timeu64_t range_max; + #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL struct work_struct uie_task; struct timer_list uie_timer; -- cgit v1.2.3 From 989515647e783221f9737ed1cf519573d26ce99b Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 8 Jan 2018 14:04:50 +0800 Subject: rtc: Add one offset seconds to expand RTC range From our investigation for all RTC drivers, 1 driver will be expired before year 2017, 7 drivers will be expired before year 2038, 23 drivers will be expired before year 2069, 72 drivers will be expired before 2100 and 104 drivers will be expired before 2106. Especially for these early expired drivers, we need to expand the RTC range to make the RTC can still work after the expired year. So we can expand the RTC range by adding one offset to the time when reading from hardware, and subtracting it when writing back. For example, if you have an RTC that can do 100 years, and currently is configured to be based in Jan 1 1970, so it can represents times from 1970 to 2069. Then if you change the start year from 1970 to 2000, which means it can represents times from 2000 to 2099. By adding or subtracting the offset produced by moving the wrap point, all times between 1970 and 1999 from RTC hardware could get interpreted as times from 2070 to 2099, but the interpretation of dates between 2000 and 2069 would not change. Signed-off-by: Baolin Wang Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index c78528c394e5..82a3038f16ab 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -152,6 +152,9 @@ struct rtc_device { time64_t range_min; timeu64_t range_max; + time64_t start_secs; + time64_t offset_secs; + bool set_start_time; #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL struct work_struct uie_task; -- cgit v1.2.3 From 83bbc5ac63326433755592829caf02920b3d8dc0 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 8 Mar 2018 00:13:52 +0100 Subject: rtc: Add useful timestamp definitions Add commonly used timestamps for range definition. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 82a3038f16ab..4c007f69082f 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -169,6 +169,11 @@ struct rtc_device { }; #define to_rtc_device(d) container_of(d, struct rtc_device, dev) +/* useful timestamps */ +#define RTC_TIMESTAMP_BEGIN_1900 -2208989361LL /* 1900-01-01 00:00:00 */ +#define RTC_TIMESTAMP_BEGIN_2000 946684800LL /* 2000-01-01 00:00:00 */ +#define RTC_TIMESTAMP_END_2099 4102444799LL /* 2099-12-31 23:59:59 */ + extern struct rtc_device *rtc_device_register(const char *name, struct device *dev, const struct rtc_class_ops *ops, -- cgit v1.2.3 From 4a681243cc2d2cea98c6b5e57224f3bcb08bce6c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 10 Mar 2018 00:27:15 -0600 Subject: rtc: s5m: Move enum from rtc.h to rtc-s5m.c Move this enum to rtc-s5m.c once it is meaningless to others drivers [1]. [1] https://marc.info/?l=linux-rtc&m=152060068925948&w=2 Suggested-by: Krzysztof Kozlowski Signed-off-by: Gustavo A. R. Silva Reviewed-by: Krzysztof Kozlowski Acked-by: Lee Jones Signed-off-by: Alexandre Belloni --- include/linux/mfd/samsung/rtc.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h index 48c3c5be7eb1..9ed2871ea335 100644 --- a/include/linux/mfd/samsung/rtc.h +++ b/include/linux/mfd/samsung/rtc.h @@ -141,15 +141,4 @@ enum s2mps_rtc_reg { #define WTSR_ENABLE_SHIFT 6 #define WTSR_ENABLE_MASK (1 << WTSR_ENABLE_SHIFT) -enum { - RTC_SEC = 0, - RTC_MIN, - RTC_HOUR, - RTC_WEEKDAY, - RTC_DATE, - RTC_MONTH, - RTC_YEAR1, - RTC_YEAR2, -}; - #endif /* __LINUX_MFD_SEC_RTC_H */ -- cgit v1.2.3 From 233bde21aa43516baa013ef7ac33f3427056db3e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Mar 2018 15:48:06 -0700 Subject: block: Move SECTOR_SIZE and SECTOR_SHIFT definitions into It happens often while I'm preparing a patch for a block driver that I'm wondering: is a definition of SECTOR_SIZE and/or SECTOR_SHIFT available for this driver? Do I have to introduce definitions of these constants before I can use these constants? To avoid this confusion, move the existing definitions of SECTOR_SIZE and SECTOR_SHIFT into the header file such that these become available for all block drivers. Make the SECTOR_SIZE definition in the uapi msdos_fs.h header file conditional to avoid that including that header file after causes the compiler to complain about a SECTOR_SIZE redefinition. Note: the SECTOR_SIZE / SECTOR_SHIFT / SECTOR_BITS definitions have not been removed from uapi header files nor from NAND drivers in which these constants are used for another purpose than converting block layer offsets and sizes into a number of sectors. Cc: David S. Miller Cc: Mike Snitzer Cc: Dan Williams Cc: Minchan Kim Cc: Nitin Gupta Reviewed-by: Sergey Senozhatsky Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 42 +++++++++++++++++++++++++++++++----------- include/linux/device-mapper.h | 2 -- include/linux/ide.h | 1 - 3 files changed, 31 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 19eaf8d89368..9af3e0f430bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1021,6 +1021,19 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) return bdev->bd_disk->queue; /* this is never NULL */ } +/* + * The basic unit of block I/O is a sector. It is used in a number of contexts + * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9 + * bytes. Variables of type sector_t represent an offset or size that is a + * multiple of 512 bytes. Hence these two constants. + */ +#ifndef SECTOR_SHIFT +#define SECTOR_SHIFT 9 +#endif +#ifndef SECTOR_SIZE +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#endif + /* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request @@ -1048,12 +1061,12 @@ extern unsigned int blk_rq_err_bytes(const struct request *rq); static inline unsigned int blk_rq_sectors(const struct request *rq) { - return blk_rq_bytes(rq) >> 9; + return blk_rq_bytes(rq) >> SECTOR_SHIFT; } static inline unsigned int blk_rq_cur_sectors(const struct request *rq) { - return blk_rq_cur_bytes(rq) >> 9; + return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; } static inline unsigned int blk_rq_zone_no(struct request *rq) @@ -1083,7 +1096,8 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) - return min(q->limits.max_discard_sectors, UINT_MAX >> 9); + return min(q->limits.max_discard_sectors, + UINT_MAX >> SECTOR_SHIFT); if (unlikely(op == REQ_OP_WRITE_SAME)) return q->limits.max_write_same_sectors; @@ -1395,16 +1409,21 @@ extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), - nr_blocks << (sb->s_blocksize_bits - 9), + return blkdev_issue_discard(sb->s_bdev, + block << (sb->s_blocksize_bits - + SECTOR_SHIFT), + nr_blocks << (sb->s_blocksize_bits - + SECTOR_SHIFT), gfp_mask, flags); } static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask) { return blkdev_issue_zeroout(sb->s_bdev, - block << (sb->s_blocksize_bits - 9), - nr_blocks << (sb->s_blocksize_bits - 9), + block << (sb->s_blocksize_bits - + SECTOR_SHIFT), + nr_blocks << (sb->s_blocksize_bits - + SECTOR_SHIFT), gfp_mask, 0); } @@ -1511,7 +1530,8 @@ static inline int queue_alignment_offset(struct request_queue *q) static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) { unsigned int granularity = max(lim->physical_block_size, lim->io_min); - unsigned int alignment = sector_div(sector, granularity >> 9) << 9; + unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT) + << SECTOR_SHIFT; return (granularity + lim->alignment_offset - alignment) % granularity; } @@ -1545,8 +1565,8 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector return 0; /* Why are these in bytes, not sectors? */ - alignment = lim->discard_alignment >> 9; - granularity = lim->discard_granularity >> 9; + alignment = lim->discard_alignment >> SECTOR_SHIFT; + granularity = lim->discard_granularity >> SECTOR_SHIFT; if (!granularity) return 0; @@ -1557,7 +1577,7 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector offset = (granularity + alignment - offset) % granularity; /* Turn it back into bytes, gaah */ - return offset << 9; + return offset << SECTOR_SHIFT; } static inline int bdev_discard_alignment(struct block_device *bdev) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index da83f64952e7..4384433b50e7 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -542,8 +542,6 @@ do { \ #define DMEMIT(x...) sz += ((sz >= maxlen) ? \ 0 : scnprintf(result + sz, maxlen - sz, x)) -#define SECTOR_SHIFT 9 - /* * Definitions of return values from target end_io function. */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 771989d25ef8..0acfa62b1d44 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -165,7 +165,6 @@ struct ide_io_ports { */ #define PARTN_BITS 6 /* number of minor dev bits for partitions */ #define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */ -#define SECTOR_SIZE 512 /* * Timeouts for various operations: -- cgit v1.2.3 From 9035cf9a97e429e6b5291841da81c433879f5658 Mon Sep 17 00:00:00 2001 From: Khalid Aziz Date: Wed, 21 Feb 2018 10:15:49 -0700 Subject: mm: Add address parameter to arch_validate_prot() A protection flag may not be valid across entire address space and hence arch_validate_prot() might need the address a protection bit is being set on to ensure it is a valid protection flag. For example, sparc processors support memory corruption detection (as part of ADI feature) flag on memory addresses mapped on to physical RAM but not on PFN mapped pages or addresses mapped on to devices. This patch adds address to the parameters being passed to arch_validate_prot() so protection bits can be validated in the relevant context. Signed-off-by: Khalid Aziz Cc: Khalid Aziz Reviewed-by: Anthony Yznaga Acked-by: Michael Ellerman (powerpc) Acked-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/mman.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mman.h b/include/linux/mman.h index 6a4d1caaff5c..4b08e9c9c538 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -92,7 +92,7 @@ static inline void vm_unacct_memory(long pages) * * Returns true if the prot flags are valid */ -static inline bool arch_validate_prot(unsigned long prot) +static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) { return (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) == 0; } -- cgit v1.2.3 From 2c2d57b5e769956fb36581e0d3cccdb5ea68038f Mon Sep 17 00:00:00 2001 From: Khalid Aziz Date: Wed, 21 Feb 2018 10:15:50 -0700 Subject: mm: Clear arch specific VM flags on protection change When protection bits are changed on a VMA, some of the architecture specific flags should be cleared as well. An examples of this are the PKEY flags on x86. This patch expands the current code that clears PKEY flags for x86, to support similar functionality for other architectures as well. Signed-off-by: Khalid Aziz Cc: Khalid Aziz Reviewed-by: Anthony Yznaga Acked-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ad06d42adb1a..ae806dbc63ee 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -287,6 +287,12 @@ extern unsigned int kobjsize(const void *objp); /* This mask is used to clear all the VMA flags used by mlock */ #define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT)) +/* Arch-specific flags to clear when updating VM flags on protection change */ +#ifndef VM_ARCH_CLEAR +# define VM_ARCH_CLEAR VM_NONE +#endif +#define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR) + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. -- cgit v1.2.3 From a4602b62d9fdea41412ba765bbf32ecfc2b6a94c Mon Sep 17 00:00:00 2001 From: Khalid Aziz Date: Wed, 21 Feb 2018 10:15:51 -0700 Subject: mm: Allow arch code to override copy_highpage() Some architectures can support metadata for memory pages and when a page is copied, its metadata must also be copied. Sparc processors from M7 onwards support metadata for memory pages. This metadata provides tag based protection for access to memory pages. To maintain this protection, the tag data must be copied to the new page when a page is migrated across NUMA nodes. This patch allows arch specific code to override default copy_highpage() and copy metadata along with page data upon migration. Signed-off-by: Khalid Aziz Cc: Khalid Aziz Reviewed-by: Anthony Yznaga Acked-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/highmem.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 776f90f3a1cd..0690679832d4 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -237,6 +237,8 @@ static inline void copy_user_highpage(struct page *to, struct page *from, #endif +#ifndef __HAVE_ARCH_COPY_HIGHPAGE + static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; @@ -248,4 +250,6 @@ static inline void copy_highpage(struct page *to, struct page *from) kunmap_atomic(vfrom); } +#endif + #endif /* _LINUX_HIGHMEM_H */ -- cgit v1.2.3 From 74a04967482faa7144b93dae3b2e913870dd421c Mon Sep 17 00:00:00 2001 From: Khalid Aziz Date: Fri, 23 Feb 2018 15:46:41 -0700 Subject: sparc64: Add support for ADI (Application Data Integrity) ADI is a new feature supported on SPARC M7 and newer processors to allow hardware to catch rogue accesses to memory. ADI is supported for data fetches only and not instruction fetches. An app can enable ADI on its data pages, set version tags on them and use versioned addresses to access the data pages. Upper bits of the address contain the version tag. On M7 processors, upper four bits (bits 63-60) contain the version tag. If a rogue app attempts to access ADI enabled data pages, its access is blocked and processor generates an exception. Please see Documentation/sparc/adi.txt for further details. This patch extends mprotect to enable ADI (TSTATE.mcde), enable/disable MCD (Memory Corruption Detection) on selected memory ranges, enable TTE.mcd in PTEs, return ADI parameters to userspace and save/restore ADI version tags on page swap out/in or migration. ADI is not enabled by default for any task. A task must explicitly enable ADI on a memory range and set version tag for ADI to be effective for the task. Signed-off-by: Khalid Aziz Cc: Khalid Aziz Reviewed-by: Anthony Yznaga Signed-off-by: David S. Miller --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ae806dbc63ee..32fe6919a11b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -245,6 +245,9 @@ extern unsigned int kobjsize(const void *objp); # define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_IA64) # define VM_GROWSUP VM_ARCH_1 +#elif defined(CONFIG_SPARC64) +# define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ +# define VM_ARCH_CLEAR VM_SPARC_ADI #elif !defined(CONFIG_MMU) # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ #endif -- cgit v1.2.3 From f63109f0cb40bca848eef9bf096dfdb7def5e20d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 4 Mar 2018 23:37:22 +0100 Subject: gpio: htc-gpio: Include the right header This driver is a pure GPIO driver and should only include . Drop the include of from the platform data header as well, it serves no purpose. Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-htc-egpio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-htc-egpio.h b/include/linux/platform_data/gpio-htc-egpio.h index b7baf1e42c55..9a3e78082883 100644 --- a/include/linux/platform_data/gpio-htc-egpio.h +++ b/include/linux/platform_data/gpio-htc-egpio.h @@ -6,8 +6,6 @@ #ifndef __HTC_EGPIO_H__ #define __HTC_EGPIO_H__ -#include - /* Descriptive values for all-in or all-out htc_egpio_chip descriptors. */ #define HTC_EGPIO_OUTPUT (~0) #define HTC_EGPIO_INPUT 0 -- cgit v1.2.3 From 1c94984396dc7bc40b4f6899674eaa41f29a4f6e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 1 Mar 2018 00:19:21 +0100 Subject: vfs: make sure struct filename->iname is word-aligned I noticed that offsetof(struct filename, iname) is actually 28 on 64 bit platforms, so we always pass an unaligned pointer to strncpy_from_user. This is mostly a problem for those 64 bit platforms without HAVE_EFFICIENT_UNALIGNED_ACCESS, but even on x86_64, unaligned accesses carry a penalty. A user-space microbenchmark doing nothing but strncpy_from_user from the same (aligned) source string runs about 5% faster when the destination is aligned. That number increases to 20% when the string is long enough (~32 bytes) that we cross a cache line boundary - that's for example the case for about half the files a "git status" in a kernel tree ends up stat'ing. This won't make any real-life workloads 5%, or even 1%, faster, but path lookup is common enough that cutting even a few cycles should be worthwhile. So ensure we always pass an aligned destination pointer to strncpy_from_user. Instead of explicit padding, simply swap the refcnt and aname members, as suggested by Al Viro. Signed-off-by: Rasmus Villemoes Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a815560fda0..d7b2caadb292 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2380,8 +2380,8 @@ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ - struct audit_names *aname; int refcnt; + struct audit_names *aname; const char iname[]; }; -- cgit v1.2.3 From 08fdc8a0138afaf324296a342f32ad26ec465e43 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 3 Oct 2017 18:17:41 +0200 Subject: buffer.c: call thaw_super during emergency thaw There are 2 distinct freezing mechanisms - one operates on block devices and another one directly on super blocks. Both end up with the same result, but thaw of only one of these does not thaw the other. In particular fsfreeze --freeze uses the ioctl variant going to the super block. Since prior to this patch emergency thaw was not doing a relevant thaw, filesystems frozen with this method remained unaffected. The patch is a hack which adds blind unfreezing. In order to keep the super block write-locked the whole time the code is shuffled around and the newly introduced __iterate_supers is employed. Signed-off-by: Mateusz Guzik Signed-off-by: Al Viro --- include/linux/fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 339e73742e73..b864fcb3b5aa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2428,6 +2428,7 @@ extern int sync_blockdev(struct block_device *bdev); extern void kill_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); +extern void emergency_thaw_bdev(struct super_block *sb); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); @@ -2453,6 +2454,11 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) return 0; } +static inline int emergency_thaw_bdev(struct super_block *sb) +{ + return 0; +} + static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) { } -- cgit v1.2.3 From a84d1169164b274f13b97a23ff235c000efe3b49 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Mar 2018 17:12:40 +0100 Subject: y2038: Introduce struct __kernel_old_timeval Dealing with 'struct timeval' users in the y2038 series is a bit tricky: We have two definitions of timeval that are visible to user space, one comes from glibc (or some other C library), the other comes from linux/time.h. The kernel copy is what we want to be used for a number of structures defined by the kernel itself, e.g. elf_prstatus (used it core dumps), sysinfo and rusage (used in system calls). These generally tend to be used for passing time intervals rather than absolute (epoch-based) times, so they do not suffer from the y2038 overflow. Some of them could be changed to use 64-bit timestamps by creating new system calls, others like the core files cannot easily be changed. An application using these interfaces likely also uses gettimeofday() or other interfaces that use absolute times, and pass 'struct timeval' pointers directly into kernel interfaces, so glibc must redefine their timeval based on a 64-bit time_t when they introduce their y2038-safe interfaces. The only reasonable way forward I see is to remove the 'timeval' definion from the kernel's uapi headers, and change the interfaces that we do not want to (or cannot) duplicate for 64-bit times to use a new __kernel_old_timeval definition instead. This type should be avoided for all new interfaces (those can use 64-bit nanoseconds, or the 64-bit version of timespec instead), and should be used with great care when converting existing interfaces from timeval, to be sure they don't suffer from the y2038 overflow, and only with consensus for the particular user that using __kernel_old_timeval is better than moving to a 64-bit based interface. The structure name is intentionally chosen to not conflict with user space types, and to be ugly enough to discourage its use. Note that ioctl based interfaces that pass a bare 'timeval' pointer cannot change to '__kernel_old_timeval' because the user space source code refers to 'timeval' instead, and we don't want to modify the user space sources if possible. However, any application that relies on a structure to contain an embedded 'timeval' (e.g. by passing a pointer to the member into a function call that expects a timeval pointer) is broken when that structure gets converted to __kernel_old_timeval. I don't see any way around that, and we have to rely on the compiler to produce a warning or compile failure that will alert users when they recompile their sources against a new libc. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: Stephen Boyd Cc: John Stultz Cc: Al Viro Link: https://lkml.kernel.org/r/20180315161739.576085-1-arnd@arndb.de --- include/linux/time32.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/time32.h b/include/linux/time32.h index 65b1de25198d..d2bcd4377b56 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -217,5 +217,6 @@ static inline s64 timeval_to_ns(const struct timeval *tv) * Returns the timeval representation of the nsec parameter. */ extern struct timeval ns_to_timeval(const s64 nsec); +extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec); #endif -- cgit v1.2.3 From e5878732a521dd31ea6377875e49adc424503e5b Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Mon, 19 Mar 2018 10:02:18 +0800 Subject: ahci: imx: add the imx6qp ahci sata support - Regarding to imx6q ahci sata, imx6qp ahci sata has the reset mechanism. Add the imx6qp ahci sata support in this commit. - Use the specific reset callback for imx53 sata, and use the default ahci_ops.softreset for the others. Signed-off-by: Richard Zhu Signed-off-by: Tejun Heo --- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index c8e0164c5423..e06f5f79eaef 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -243,6 +243,8 @@ #define IMX6Q_GPR4_IPU_RD_CACHE_CTL BIT(0) #define IMX6Q_GPR5_L2_CLK_STOP BIT(8) +#define IMX6Q_GPR5_SATA_SW_PD BIT(10) +#define IMX6Q_GPR5_SATA_SW_RST BIT(11) #define IMX6Q_GPR6_IPU1_ID00_WR_QOS_MASK (0xf << 0) #define IMX6Q_GPR6_IPU1_ID01_WR_QOS_MASK (0xf << 4) -- cgit v1.2.3 From b3a5d111994450909158929560906f2c1c6c1d85 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Mar 2018 12:45:12 -0700 Subject: percpu_ref: Update doc to dissuade users from depending on internal RCU grace periods percpu_ref internally uses sched-RCU to implement the percpu -> atomic mode switching and the documentation suggested that this could be depended upon. This doesn't seem like a good idea. * percpu_ref uses sched-RCU which has different grace periods regular RCU. Users may combine percpu_ref with regular RCU usage and incorrectly believe that regular RCU grace periods are performed by percpu_ref. This can lead to, for example, use-after-free due to premature freeing. * percpu_ref has a grace period when switching from percpu to atomic mode. It doesn't have one between the last put and release. This distinction is subtle and can lead to surprising bugs. * percpu_ref allows starting in and switching to atomic mode manually for debugging and other purposes. This means that there may not be any grace periods from kill to release. This patch makes it clear that the grace periods are percpu_ref's internal implementation detail and can't be depended upon by the users. Signed-off-by: Tejun Heo Cc: Kent Overstreet Cc: Linus Torvalds Signed-off-by: Tejun Heo --- include/linux/percpu-refcount.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 864d167a1073..009cdf3d65b6 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -30,10 +30,14 @@ * calls io_destroy() or the process exits. * * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it - * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove - * the kioctx from the proccess's list of kioctxs - after that, there can't be - * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop - * the initial ref with percpu_ref_put(). + * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref. + * After that, there can't be any new users of the kioctx (from lookup_ioctx()) + * and it's then safe to drop the initial ref with percpu_ref_put(). + * + * Note that the free path, free_ioctx(), needs to go through explicit call_rcu() + * to synchronize with RCU protected lookup_ioctx(). percpu_ref operations don't + * imply RCU grace periods of any kind and if a user wants to combine percpu_ref + * with RCU protection, it must be done explicitly. * * Code that does a two stage shutdown like this often needs some kind of * explicit synchronization to ensure the initial refcount can only be dropped @@ -113,8 +117,10 @@ void percpu_ref_reinit(struct percpu_ref *ref); * Must be used to drop the initial ref on a percpu refcount; must be called * precisely once before shutdown. * - * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the - * percpu counters and dropping the initial ref. + * Switches @ref into atomic mode before gathering up the percpu counters + * and dropping the initial ref. + * + * There are no implied RCU grace periods between kill and release. */ static inline void percpu_ref_kill(struct percpu_ref *ref) { -- cgit v1.2.3 From 05f0fe6b74dbd7690a4cbd61810948b7d575576a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Mar 2018 12:45:13 -0700 Subject: RCU, workqueue: Implement rcu_work There are cases where RCU callback needs to be bounced to a sleepable context. This is currently done by the RCU callback queueing a work item, which can be cumbersome to write and confusing to read. This patch introduces rcu_work, a workqueue work variant which gets executed after a RCU grace period, and converts the open coded bouncing in fs/aio and kernel/cgroup. v3: Dropped queue_rcu_work_on(). Documented rcu grace period behavior after queue_rcu_work(). v2: Use rcu_barrier() instead of synchronize_rcu() to wait for completion of previously queued rcu callback as per Paul. Signed-off-by: Tejun Heo Acked-by: "Paul E. McKenney" Cc: Linus Torvalds --- include/linux/workqueue.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bc0cda180c8b..d026f8f818cc 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -13,6 +13,7 @@ #include #include #include +#include struct workqueue_struct; @@ -120,6 +121,14 @@ struct delayed_work { int cpu; }; +struct rcu_work { + struct work_struct work; + struct rcu_head rcu; + + /* target workqueue ->rcu uses to queue ->work */ + struct workqueue_struct *wq; +}; + /** * struct workqueue_attrs - A struct for workqueue attributes. * @@ -151,6 +160,11 @@ static inline struct delayed_work *to_delayed_work(struct work_struct *work) return container_of(work, struct delayed_work, work); } +static inline struct rcu_work *to_rcu_work(struct work_struct *work) +{ + return container_of(work, struct rcu_work, work); +} + struct execute_work { struct work_struct work; }; @@ -266,6 +280,12 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func) \ __INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE) +#define INIT_RCU_WORK(_work, _func) \ + INIT_WORK(&(_work)->work, (_func)) + +#define INIT_RCU_WORK_ONSTACK(_work, _func) \ + INIT_WORK_ONSTACK(&(_work)->work, (_func)) + /** * work_pending - Find out whether a work item is currently pending * @work: The work item in question @@ -447,6 +467,7 @@ extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay); +extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork); extern void flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); @@ -463,6 +484,8 @@ extern bool flush_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); +extern bool flush_rcu_work(struct rcu_work *rwork); + extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); extern struct work_struct *current_work(void); -- cgit v1.2.3 From 8f36aaec9c929f2864196b0799203491f6a67dc6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Mar 2018 12:45:14 -0700 Subject: cgroup: Use rcu_work instead of explicit rcu and work item Workqueue now has rcu_work. Use it instead of open-coding rcu -> work item bouncing. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 9f242b876fde..92d7640632ef 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -151,8 +151,8 @@ struct cgroup_subsys_state { atomic_t online_cnt; /* percpu_ref killing and RCU release */ - struct rcu_head rcu_head; struct work_struct destroy_work; + struct rcu_work destroy_rwork; /* * PI: the parent css. Placed here for cache proximity to following -- cgit v1.2.3 From 05d3ac978ed25b753bfe34fe76c50c31ee506a82 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 19 Mar 2018 15:10:29 +0200 Subject: net/mlx5: Packet pacing enhancement Add two new parameters: max_burst_sz and typical_pkt_size (both in bytes) to rate limit configurations. max_burst_sz: The device will schedule bursts of packets for an SQ connected to this rate, smaller than or equal to this value. Value 0x0 indicates packet bursts will be limited to the device defaults. This field should be used if bursts of packets must be strictly kept under a certain value. typical_pkt_size: When the rate limit is intended for a stream of similar packets, stating the typical packet size can improve the accuracy of the rate limiter. The expected packet size will be the same for all SQs associated with the same rate limit index. Ethernet driver is updated according to this change, but these two parameters will be kept as 0 due to lacking of proper way to get the configurations from user space which requires to change ndo_set_tx_maxrate interface. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 15 ++++++++++++--- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cded85ab6fe4..767d193c269a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -591,8 +591,14 @@ struct mlx5_eswitch; struct mlx5_lag; struct mlx5_pagefault; +struct mlx5_rate_limit { + u32 rate; + u32 max_burst_sz; + u16 typical_pkt_sz; +}; + struct mlx5_rl_entry { - u32 rate; + struct mlx5_rate_limit rl; u16 index; u16 refcount; }; @@ -1107,9 +1113,12 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); -int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index); -void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate); +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, + struct mlx5_rate_limit *rl); +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl); bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate); +bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, + struct mlx5_rate_limit *rl_1); int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, bool map_wc, bool fast_path); void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 14ad84afe8ba..c63bbdc35503 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -571,7 +571,10 @@ struct mlx5_ifc_qos_cap_bits { u8 esw_scheduling[0x1]; u8 esw_bw_share[0x1]; u8 esw_rate_limit[0x1]; - u8 reserved_at_4[0x1c]; + u8 reserved_at_4[0x1]; + u8 packet_pacing_burst_bound[0x1]; + u8 packet_pacing_typical_size[0x1]; + u8 reserved_at_7[0x19]; u8 reserved_at_20[0x20]; @@ -7313,7 +7316,12 @@ struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 rate_limit[0x20]; - u8 reserved_at_a0[0x160]; + u8 burst_upper_bound[0x20]; + + u8 reserved_at_c0[0x10]; + u8 typical_packet_size[0x10]; + + u8 reserved_at_e0[0x120]; }; struct mlx5_ifc_access_register_out_bits { -- cgit v1.2.3 From 33c4c8a588e6cccf3832b84b7792f02153e0ccda Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 12 Mar 2018 10:41:18 +0100 Subject: PCI: Add Altera vendor ID Add the Altera PCI Vendor id to pci_ids.h and remove the private definitions from xillybus_pcie.c and altera-cvp.c. Signed-off-by: Johannes Thumshirn Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Acked-by: Eli Billauer Acked-by: Bjorn Helgaas Cc: Anatolij Gustschin --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a6b30667a331..6a96a70fb462 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1561,6 +1561,8 @@ #define PCI_DEVICE_ID_SERVERWORKS_CSB6LPC 0x0227 #define PCI_DEVICE_ID_SERVERWORKS_HT1100LD 0x0408 +#define PCI_VENDOR_ID_ALTERA 0x1172 + #define PCI_VENDOR_ID_SBE 0x1176 #define PCI_DEVICE_ID_SBE_WANXL100 0x0301 #define PCI_DEVICE_ID_SBE_WANXL200 0x0302 -- cgit v1.2.3 From 312fc2b4c82e96a48cb2d0da2bd4816eb253c499 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 18 Mar 2018 12:57:00 -0700 Subject: net: do_tcp_sendpages flag to avoid SKBTX_SHARED_FRAG When calling do_tcp_sendpages() from in kernel and we know the data has no references from user side we can omit SKBTX_SHARED_FRAG flag. This patch adds an internal flag, NO_SKBTX_SHARED_FRAG that can be used to omit setting SKBTX_SHARED_FRAG. The flag is not exposed to userspace because the sendpage call from the splice logic masks out all bits except MSG_MORE. Signed-off-by: John Fastabend Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 1ce1f768a58c..60e01482a9c4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -287,6 +287,7 @@ struct ucred { #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ #define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ #define MSG_EOF MSG_FIN +#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ -- cgit v1.2.3 From 4f738adba30a7cfc006f605707e7aee847ffefa0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 18 Mar 2018 12:57:10 -0700 Subject: bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data This implements a BPF ULP layer to allow policy enforcement and monitoring at the socket layer. In order to support this a new program type BPF_PROG_TYPE_SK_MSG is used to run the policy at the sendmsg/sendpage hook. To attach the policy to sockets a sockmap is used with a new program attach type BPF_SK_MSG_VERDICT. Similar to previous sockmap usages when a sock is added to a sockmap, via a map update, if the map contains a BPF_SK_MSG_VERDICT program type attached then the BPF ULP layer is created on the socket and the attached BPF_PROG_TYPE_SK_MSG program is run for every msg in sendmsg case and page/offset in sendpage case. BPF_PROG_TYPE_SK_MSG Semantics/API: BPF_PROG_TYPE_SK_MSG supports only two return codes SK_PASS and SK_DROP. Returning SK_DROP free's the copied data in the sendmsg case and in the sendpage case leaves the data untouched. Both cases return -EACESS to the user. Returning SK_PASS will allow the msg to be sent. In the sendmsg case data is copied into kernel space buffers before running the BPF program. The kernel space buffers are stored in a scatterlist object where each element is a kernel memory buffer. Some effort is made to coalesce data from the sendmsg call here. For example a sendmsg call with many one byte iov entries will likely be pushed into a single entry. The BPF program is run with data pointers (start/end) pointing to the first sg element. In the sendpage case data is not copied. We opt not to copy the data by default here, because the BPF infrastructure does not know what bytes will be needed nor when they will be needed. So copying all bytes may be wasteful. Because of this the initial start/end data pointers are (0,0). Meaning no data can be read or written. This avoids reading data that may be modified by the user. A new helper is added later in this series if reading and writing the data is needed. The helper call will do a copy by default so that the page is exclusively owned by the BPF call. The verdict from the BPF_PROG_TYPE_SK_MSG applies to the entire msg in the sendmsg() case and the entire page/offset in the sendpage case. This avoids ambiguity on how to handle mixed return codes in the sendmsg case. Again a helper is added later in the series if a verdict needs to apply to multiple system calls and/or only a subpart of the currently being processed message. The helper msg_redirect_map() can be used to select the socket to send the data on. This is used similar to existing redirect use cases. This allows policy to redirect msgs. Pseudo code simple example: The basic logic to attach a program to a socket is as follows, // load the programs bpf_prog_load(SOCKMAP_TCP_MSG_PROG, BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog); // lookup the sockmap bpf_map_msg = bpf_object__find_map_by_name(obj, "my_sock_map"); // get fd for sockmap map_fd_msg = bpf_map__fd(bpf_map_msg); // attach program to sockmap bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0); Adding sockets to the map is done in the normal way, // Add a socket 'fd' to sockmap at location 'i' bpf_map_update_elem(map_fd_msg, &i, fd, BPF_ANY); After the above any socket attached to "my_sock_map", in this case 'fd', will run the BPF msg verdict program (msg_prog) on every sendmsg and sendpage system call. For a complete example see BPF selftests or sockmap samples. Implementation notes: It seemed the simplest, to me at least, to use a refcnt to ensure psock is not lost across the sendmsg copy into the sg, the bpf program running on the data in sg_data, and the final pass to the TCP stack. Some performance testing may show a better method to do this and avoid the refcnt cost, but for now use the simpler method. Another item that will come after basic support is in place is supporting MSG_MORE flag. At the moment we call sendpages even if the MSG_MORE flag is set. An enhancement would be to collect the pages into a larger scatterlist and pass down the stack. Notice that bpf_tcp_sendmsg() could support this with some additional state saved across sendmsg calls. I built the code to support this without having to do refactoring work. Other features TBD include ZEROCOPY and the TCP_RECV_QUEUE/TCP_NO_QUEUE support. This will follow initial series shortly. Future work could improve size limits on the scatterlist rings used here. Currently, we use MAX_SKB_FRAGS simply because this was being used already in the TLS case. Future work could extend the kernel sk APIs to tune this depending on workload. This is a trade-off between memory usage and throughput performance. Signed-off-by: John Fastabend Acked-by: David S. Miller Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + include/linux/bpf_types.h | 1 + include/linux/filter.h | 17 +++++++++++++++++ 3 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 66df387106de..819229c80eca 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -21,6 +21,7 @@ struct bpf_verifier_env; struct perf_event; struct bpf_prog; struct bpf_map; +struct sock; /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 19b8349a3809..5e2e8a49fb21 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -13,6 +13,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) #endif #ifdef CONFIG_BPF_EVENTS BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) diff --git a/include/linux/filter.h b/include/linux/filter.h index fdb691b520c0..109d05ccea9a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -507,6 +507,22 @@ struct xdp_buff { struct xdp_rxq_info *rxq; }; +struct sk_msg_buff { + void *data; + void *data_end; + __u32 apply_bytes; + __u32 cork_bytes; + int sg_copybreak; + int sg_start; + int sg_curr; + int sg_end; + struct scatterlist sg_data[MAX_SKB_FRAGS]; + bool sg_copy[MAX_SKB_FRAGS]; + __u32 key; + __u32 flags; + struct bpf_map *map; +}; + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) @@ -771,6 +787,7 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) void bpf_warn_invalid_xdp_action(u32 act); struct sock *do_sk_redirect_map(struct sk_buff *skb); +struct sock *do_msg_redirect_map(struct sk_msg_buff *md); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; -- cgit v1.2.3 From 751ba79cc552c146595cd439b21c4ff8998c3b69 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Fri, 4 Aug 2017 13:42:32 +1000 Subject: lib/raid6/altivec: Add vpermxor implementation for raid6 Q syndrome This patch uses the vpermxor instruction to optimise the raid6 Q syndrome. This instruction was made available with POWER8, ISA version 2.07. It allows for both vperm and vxor instructions to be done in a single instruction. This has been tested for correctness on a ppc64le vm with a basic RAID6 setup containing 5 drives. The performance benchmarks are from the raid6test in the /lib/raid6/test directory. These results are from an IBM Firestone machine with ppc64le architecture. The benchmark results show a 35% speed increase over the best existing algorithm for powerpc (altivec). The raid6test has also been run on a big-endian ppc64 vm to ensure it also works for big-endian architectures. Performance benchmarks: raid6: altivecx4 gen() 18773 MB/s raid6: altivecx8 gen() 19438 MB/s raid6: vpermxor4 gen() 25112 MB/s raid6: vpermxor8 gen() 26279 MB/s Signed-off-by: Matt Brown Reviewed-by: Daniel Axtens [mpe: Add VPERMXOR macro so we can build with old binutils] Signed-off-by: Michael Ellerman --- include/linux/raid/pq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 583cdd3d49ca..fd2e02461e41 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2; extern const struct raid6_calls raid6_avx512x4; extern const struct raid6_calls raid6_tilegx8; extern const struct raid6_calls raid6_s390vx8; +extern const struct raid6_calls raid6_vpermxor1; +extern const struct raid6_calls raid6_vpermxor2; +extern const struct raid6_calls raid6_vpermxor4; +extern const struct raid6_calls raid6_vpermxor8; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); -- cgit v1.2.3 From 7f65ea42eb00bc902f1c37a71e984e4f4064cfa9 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Fri, 9 Mar 2018 09:52:42 +0000 Subject: sched/fair: Add util_est on top of PELT The util_avg signal computed by PELT is too variable for some use-cases. For example, a big task waking up after a long sleep period will have its utilization almost completely decayed. This introduces some latency before schedutil will be able to pick the best frequency to run a task. The same issue can affect task placement. Indeed, since the task utilization is already decayed at wakeup, when the task is enqueued in a CPU, this can result in a CPU running a big task as being temporarily represented as being almost empty. This leads to a race condition where other tasks can be potentially allocated on a CPU which just started to run a big task which slept for a relatively long period. Moreover, the PELT utilization of a task can be updated every [ms], thus making it a continuously changing value for certain longer running tasks. This means that the instantaneous PELT utilization of a RUNNING task is not really meaningful to properly support scheduler decisions. For all these reasons, a more stable signal can do a better job of representing the expected/estimated utilization of a task/cfs_rq. Such a signal can be easily created on top of PELT by still using it as an estimator which produces values to be aggregated on meaningful events. This patch adds a simple implementation of util_est, a new signal built on top of PELT's util_avg where: util_est(task) = max(task::util_avg, f(task::util_avg@dequeue)) This allows to remember how big a task has been reported by PELT in its previous activations via f(task::util_avg@dequeue), which is the new _task_util_est(struct task_struct*) function added by this patch. If a task should change its behavior and it runs longer in a new activation, after a certain time its util_est will just track the original PELT signal (i.e. task::util_avg). The estimated utilization of cfs_rq is defined only for root ones. That's because the only sensible consumer of this signal are the scheduler and schedutil when looking for the overall CPU utilization due to FAIR tasks. For this reason, the estimated utilization of a root cfs_rq is simply defined as: util_est(cfs_rq) = max(cfs_rq::util_avg, cfs_rq::util_est::enqueued) where: cfs_rq::util_est::enqueued = sum(_task_util_est(task)) for each RUNNABLE task on that root cfs_rq It's worth noting that the estimated utilization is tracked only for objects of interests, specifically: - Tasks: to better support tasks placement decisions - root cfs_rqs: to better support both tasks placement decisions as well as frequencies selection Signed-off-by: Patrick Bellasi Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dietmar Eggemann Cc: Joel Fernandes Cc: Juri Lelli Cc: Linus Torvalds Cc: Morten Rasmussen Cc: Paul Turner Cc: Rafael J . Wysocki Cc: Steve Muckle Cc: Thomas Gleixner Cc: Todd Kjos Cc: Vincent Guittot Cc: Viresh Kumar Link: http://lkml.kernel.org/r/20180309095245.11071-2-patrick.bellasi@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 21b1168da951..f228c6033832 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -274,6 +274,34 @@ struct load_weight { u32 inv_weight; }; +/** + * struct util_est - Estimation utilization of FAIR tasks + * @enqueued: instantaneous estimated utilization of a task/cpu + * @ewma: the Exponential Weighted Moving Average (EWMA) + * utilization of a task + * + * Support data structure to track an Exponential Weighted Moving Average + * (EWMA) of a FAIR task's utilization. New samples are added to the moving + * average each time a task completes an activation. Sample's weight is chosen + * so that the EWMA will be relatively insensitive to transient changes to the + * task's workload. + * + * The enqueued attribute has a slightly different meaning for tasks and cpus: + * - task: the task's util_avg at last task dequeue time + * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU + * Thus, the util_est.enqueued of a task represents the contribution on the + * estimated utilization of the CPU where that task is currently enqueued. + * + * Only for tasks we track a moving average of the past instantaneous + * estimated utilization. This allows to absorb sporadic drops in utilization + * of an otherwise almost periodic task. + */ +struct util_est { + unsigned int enqueued; + unsigned int ewma; +#define UTIL_EST_WEIGHT_SHIFT 2 +}; + /* * The load_avg/util_avg accumulates an infinite geometric series * (see __update_load_avg() in kernel/sched/fair.c). @@ -335,6 +363,7 @@ struct sched_avg { unsigned long load_avg; unsigned long runnable_load_avg; unsigned long util_avg; + struct util_est util_est; }; struct sched_statistics { -- cgit v1.2.3 From 6b2bb7265f0b62605e8caee3613449ed0db270b9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Mar 2018 11:40:33 +0100 Subject: sched/wait: Introduce wait_var_event() As a replacement for the wait_on_atomic_t() API provide the wait_var_event() API. The wait_var_event() API is based on the very same hashed-waitqueue idea, but doesn't care about the type (atomic_t) or the specific condition (atomic_read() == 0). IOW. it's much more widely applicable/flexible. It shares all the benefits/disadvantages of a hashed-waitqueue approach with the existing wait_on_atomic_t/wait_on_bit() APIs. The API is modeled after the existing wait_event() API, but instead of taking a wait_queue_head, it takes an address. This addresses is hashed to obtain a wait_queue_head from the bit_wait_table. Similar to the wait_event() API, it takes a condition expression as second argument and will wait until this expression becomes true. The following are (mostly) identical replacements: wait_on_atomic_t(&my_atomic, atomic_t_wait, TASK_UNINTERRUPTIBLE); wake_up_atomic_t(&my_atomic); wait_var_event(&my_atomic, !atomic_read(&my_atomic)); wake_up_var(&my_atomic); The only difference is that wake_up_var() is an unconditional wakeup and doesn't check the previously hard-coded (atomic_read() == 0) condition here. This is of little concequence, since most callers are already conditional on atomic_dec_and_test() and the ones that are not, are trivial to make so. Tested-by: Dan Williams Signed-off-by: Peter Zijlstra (Intel) Cc: David Howells Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait_bit.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 61b39eaf7cad..3fcdb75d69cf 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -262,4 +262,74 @@ int wait_on_atomic_t(atomic_t *val, wait_atomic_t_action_f action, unsigned mode return out_of_line_wait_on_atomic_t(val, action, mode); } +extern void init_wait_var_entry(struct wait_bit_queue_entry *wbq_entry, void *var, int flags); +extern void wake_up_var(void *var); +extern wait_queue_head_t *__var_waitqueue(void *p); + +#define ___wait_var_event(var, condition, state, exclusive, ret, cmd) \ +({ \ + __label__ __out; \ + struct wait_queue_head *__wq_head = __var_waitqueue(var); \ + struct wait_bit_queue_entry __wbq_entry; \ + long __ret = ret; /* explicit shadow */ \ + \ + init_wait_var_entry(&__wbq_entry, var, \ + exclusive ? WQ_FLAG_EXCLUSIVE : 0); \ + for (;;) { \ + long __int = prepare_to_wait_event(__wq_head, \ + &__wbq_entry.wq_entry, \ + state); \ + if (condition) \ + break; \ + \ + if (___wait_is_interruptible(state) && __int) { \ + __ret = __int; \ + goto __out; \ + } \ + \ + cmd; \ + } \ + finish_wait(__wq_head, &__wbq_entry.wq_entry); \ +__out: __ret; \ +}) + +#define __wait_var_event(var, condition) \ + ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + schedule()) + +#define wait_var_event(var, condition) \ +do { \ + might_sleep(); \ + if (condition) \ + break; \ + __wait_var_event(var, condition); \ +} while (0) + +#define __wait_var_event_killable(var, condition) \ + ___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \ + schedule()) + +#define wait_var_event_killable(var, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_var_event_killable(var, condition); \ + __ret; \ +}) + +#define __wait_var_event_timeout(var, condition, timeout) \ + ___wait_var_event(var, ___wait_cond_timeout(condition), \ + TASK_UNINTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) + +#define wait_var_event_timeout(var, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_var_event_timeout(var, condition, timeout); \ + __ret; \ +}) + #endif /* _LINUX_WAIT_BIT_H */ -- cgit v1.2.3 From dc5d4afbb0bf7b7746ff5e56e1a5688ad7f29b32 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Mar 2018 11:43:43 +0100 Subject: sched/wait, fs/fscache: Convert wait_on_atomic_t() usage to the new wait_var_event() API The old wait_on_atomic_t() is going to get removed, use the more flexible wait_var_event() API instead. No change in functionality. Signed-off-by: Peter Zijlstra (Intel) Cc: David Howells Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/fscache-cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 4c467ef50159..3b03e29e2f1a 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -496,7 +496,7 @@ static inline bool __fscache_unuse_cookie(struct fscache_cookie *cookie) static inline void __fscache_wake_unused_cookie(struct fscache_cookie *cookie) { - wake_up_atomic_t(&cookie->n_active); + wake_up_var(&cookie->n_active); } /** -- cgit v1.2.3 From 9b8cce52c4b5c08297900bfdcafc6b08d9bc4a27 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Mar 2018 11:46:30 +0100 Subject: sched/wait: Remove the wait_on_atomic_t() API There are no users left (everyone got converted to wait_var_event()), remove it. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait_bit.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 3fcdb75d69cf..9318b2166439 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -10,7 +10,6 @@ struct wait_bit_key { void *flags; int bit_nr; -#define WAIT_ATOMIC_T_BIT_NR -1 unsigned long timeout; }; @@ -22,21 +21,15 @@ struct wait_bit_queue_entry { #define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } -#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ - { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } - typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); -typedef int wait_atomic_t_action_f(atomic_t *counter, unsigned int mode); void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); void wake_up_bit(void *word, int bit); -void wake_up_atomic_t(atomic_t *p); int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_atomic_t(atomic_t *p, wait_atomic_t_action_f action, unsigned int mode); struct wait_queue_head *bit_waitqueue(void *word, int bit); extern void __init wait_bit_init(void); @@ -57,7 +50,6 @@ extern int bit_wait(struct wait_bit_key *key, int mode); extern int bit_wait_io(struct wait_bit_key *key, int mode); extern int bit_wait_timeout(struct wait_bit_key *key, int mode); extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); -extern int atomic_t_wait(atomic_t *counter, unsigned int mode); /** * wait_on_bit - wait for a bit to be cleared @@ -243,25 +235,6 @@ wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, return out_of_line_wait_on_bit_lock(word, bit, action, mode); } -/** - * wait_on_atomic_t - Wait for an atomic_t to become 0 - * @val: The atomic value being waited on, a kernel virtual address - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for - * the purpose of getting a waitqueue, but we set the key to a bit number - * outside of the target 'word'. - */ -static inline -int wait_on_atomic_t(atomic_t *val, wait_atomic_t_action_f action, unsigned mode) -{ - might_sleep(); - if (atomic_read(val) == 0) - return 0; - return out_of_line_wait_on_atomic_t(val, action, mode); -} - extern void init_wait_var_entry(struct wait_bit_queue_entry *wbq_entry, void *var, int flags); extern void wake_up_var(void *var); extern wait_queue_head_t *__var_waitqueue(void *p); -- cgit v1.2.3 From 578ae447e7e5d78c90ac40a06406c1741f79ba96 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 19 Mar 2018 13:18:57 -0500 Subject: jump_label: Disable jump labels in __exit code With the following commit: 333522447063 ("jump_label: Explicitly disable jump labels in __init code") ... we explicitly disabled jump labels in __init code, so they could be detected and not warned about in the following commit: dc1dd184c2f0 ("jump_label: Warn on failed jump_label patching attempt") In-kernel __exit code has the same issue. It's never used, so it's freed along with the rest of initmem. But jump label entries in __exit code aren't explicitly disabled, so we get the following warning when enabling pr_debug() in __exit code: can't patch jump_label at dmi_sysfs_exit+0x0/0x2d WARNING: CPU: 0 PID: 22572 at kernel/jump_label.c:376 __jump_label_update+0x9d/0xb0 Fix the warning by disabling all jump labels in initmem (which includes both __init and __exit code). Reported-and-tested-by: Li Wang Signed-off-by: Josh Poimboeuf Cc: Borislav Petkov Cc: Jason Baron Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: dc1dd184c2f0 ("jump_label: Warn on failed jump_label patching attempt") Link: http://lkml.kernel.org/r/7121e6e595374f06616c505b6e690e275c0054d1.1521483452.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 2168cc6b8b30..b46b541c67c4 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -151,7 +151,7 @@ extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); -extern void jump_label_invalidate_init(void); +extern void jump_label_invalidate_initmem(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, @@ -199,7 +199,7 @@ static __always_inline void jump_label_init(void) static_key_initialized = true; } -static inline void jump_label_invalidate_init(void) {} +static inline void jump_label_invalidate_initmem(void) {} static __always_inline bool static_key_false(struct static_key *key) { -- cgit v1.2.3 From b958758e686aebe84672acc8871aca87d04f13a3 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 19 Mar 2018 14:47:19 +0100 Subject: mtd: rawnand: rename SET/GET FEATURES related functions SET/GET FEATURES are flagged ONFI-compliant because of their name. This is not accurate as non-ONFI NAND chips support it and use it. Rename the hooks and helpers to remove the "onfi" prefix. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 56c5570aadbe..fb2e288ef8b1 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1170,8 +1170,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * @blocks_per_die: [INTERN] The number of PEBs in a die * @data_interface: [INTERN] NAND interface timing information * @read_retries: [INTERN] the number of read retry modes supported - * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand - * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand + * @set_features: [REPLACEABLE] set the NAND chip features + * @get_features: [REPLACEABLE] get the NAND chip features * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If * chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this * means the configuration should not be applied but @@ -1212,10 +1212,10 @@ struct nand_chip { bool check_only); int (*erase)(struct mtd_info *mtd, int page); int (*scan_bbt)(struct mtd_info *mtd); - int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip, - int feature_addr, uint8_t *subfeature_para); - int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, - int feature_addr, uint8_t *subfeature_para); + int (*set_features)(struct mtd_info *mtd, struct nand_chip *chip, + int feature_addr, uint8_t *subfeature_para); + int (*get_features)(struct mtd_info *mtd, struct nand_chip *chip, + int feature_addr, uint8_t *subfeature_para); int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode); int (*setup_data_interface)(struct mtd_info *mtd, int chipnr, const struct nand_data_interface *conf); @@ -1630,9 +1630,8 @@ int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page); /* Stub used by drivers that do not support GET/SET FEATURES operations */ -int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd, - struct nand_chip *chip, int addr, - u8 *subfeature_param); +int nand_get_set_features_notsupp(struct mtd_info *mtd, struct nand_chip *chip, + int addr, u8 *subfeature_param); /* Default read_page_raw implementation */ int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, -- cgit v1.2.3 From 97baea1e6b74c73973fa0922252f880ab15450ea Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 19 Mar 2018 14:47:20 +0100 Subject: mtd: rawnand: use wrappers to call onfi GET/SET_FEATURES Prepare the fact that some features managed by GET/SET_FEATURES could be overloaded by vendor code. To handle this logic, use new wrappers instead of directly call the ->get/set_features() hooks. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index fb2e288ef8b1..3cc2a3435b20 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1629,6 +1629,9 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page); +/* Wrapper to use in order for controllers/vendors to GET/SET FEATURES */ +int nand_get_features(struct nand_chip *chip, int addr, u8 *subfeature_param); +int nand_set_features(struct nand_chip *chip, int addr, u8 *subfeature_param); /* Stub used by drivers that do not support GET/SET FEATURES operations */ int nand_get_set_features_notsupp(struct mtd_info *mtd, struct nand_chip *chip, int addr, u8 *subfeature_param); -- cgit v1.2.3 From b7fa07460b0f0e9fbe6d9319a0864c145bd59bcb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:22 +0100 Subject: set_memory.h: Provide set_memory_{en,de}crypted() stubs ... to make these APIs more universally available. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Tom Lendacky Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-11-hch@lst.de Signed-off-by: Ingo Molnar --- include/linux/set_memory.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index e5140648f638..da5178216da5 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT +static inline int set_memory_encrypted(unsigned long addr, int numpages) +{ + return 0; +} + +static inline int set_memory_decrypted(unsigned long addr, int numpages) +{ + return 0; +} +#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ + #endif /* _LINUX_SET_MEMORY_H_ */ -- cgit v1.2.3 From b6e05477c10c12e36141558fc14f04b00ea634d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:24 +0100 Subject: dma/direct: Handle the memory encryption bit in common code Give the basic phys_to_dma() and dma_to_phys() helpers a __-prefix and add the memory encryption mask to the non-prefixed versions. Use the __-prefixed versions directly instead of clearing the mask again in various places. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-13-hch@lst.de Signed-off-by: Ingo Molnar --- include/linux/dma-direct.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index bcdb1a3e4b1f..53ad6a47f513 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -3,18 +3,19 @@ #define _LINUX_DMA_DIRECT_H 1 #include +#include #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { dma_addr_t dev_addr = (dma_addr_t)paddr; return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) { phys_addr_t paddr = (phys_addr_t)dev_addr; @@ -30,6 +31,22 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ +/* + * If memory encryption is supported, phys_to_dma will set the memory encryption + * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma + * and __dma_to_phys versions should only be used on non-encrypted memory for + * special occasions like DMA coherent buffers. + */ +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return __sme_set(__phys_to_dma(dev, paddr)); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return __sme_clr(__dma_to_phys(dev, daddr)); +} + #ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN void dma_mark_clean(void *addr, size_t size); #else -- cgit v1.2.3 From 16e73adbca76fd18733278cb688b0ddb4cad162c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:26 +0100 Subject: dma/swiotlb: Remove swiotlb_{alloc,free}_coherent() Unused now that everyone uses swiotlb_{alloc,free}(). Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-15-hch@lst.de Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5b1f2a00491c..965be92c33b5 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -72,14 +72,6 @@ void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle, void swiotlb_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs); -extern void -*swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags); - -extern void -swiotlb_free_coherent(struct device *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); - extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, -- cgit v1.2.3 From f4531b2b1929806d2bec1a2f19805031d8bc0806 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 19 Mar 2018 14:47:26 +0100 Subject: mtd: rawnand: prepare the removal of ONFI/JEDEC parameter pages The NAND chip parameter page is statically allocated within the nand_chip structure, which reserves a lot of space. Even not ONFI nor JEDEC chips have it embedded. Also, only a few parameters are actually read from the parameter page after the detection. To prepare to the removal of such huge structure, a small NAND parameter structure is allocated statically and contains only very few members that are generic to all chips and actually used elsewhere in the code. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 3cc2a3435b20..a24591411d78 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -429,6 +429,16 @@ struct nand_jedec_params { __le16 crc; } __packed; +/** + * struct nand_parameters - NAND generic parameters from the parameter page + * @model: Model name + * @supports_set_get_features: The NAND chip supports setting/getting features + */ +struct nand_parameters { + char model[100]; + bool supports_set_get_features; +}; + /* The maximum expected count of bytes in the NAND ID sequence */ #define NAND_MAX_ID_LEN 8 @@ -1165,6 +1175,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * supported, 0 otherwise. * @jedec_params: [INTERN] holds the JEDEC parameter page when JEDEC is * supported, 0 otherwise. + * @parameters: [INTERN] holds generic parameters under an easily + * readable form. * @max_bb_per_die: [INTERN] the max number of bad blocks each die of a * this nand device will encounter their life times. * @blocks_per_die: [INTERN] The number of PEBs in a die @@ -1249,6 +1261,7 @@ struct nand_chip { struct nand_onfi_params onfi_params; struct nand_jedec_params jedec_params; }; + struct nand_parameters parameters; u16 max_bb_per_die; u32 blocks_per_die; -- cgit v1.2.3 From a97421c7532d382ab560ca153bdf9450f97c7e41 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 19 Mar 2018 14:47:27 +0100 Subject: mtd: rawnand: prepare the removal of the ONFI parameter page The NAND chip parameter page is statically allocated within the nand_chip structure, which reserves a lot of space. Even not ONFI nor JEDEC chips have it embedded. Also, only a few parameters are actually read from the parameter page after the detection. ONFI-related parameters that will be used outside from the identification function are stored in a separate onfi_parameters structure embedded in nand_parameters, this small structure that already hold generic parameters. For now, the onfi_parameters structure is allocated statically. However, after some deep rework in the NAND framework, it will be possible to do dynamic allocations from the NAND identification phase, and this strcuture will then be dynamically allocated when needed. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 47 ++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index a24591411d78..7b5afa6ef5a9 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -429,14 +429,41 @@ struct nand_jedec_params { __le16 crc; } __packed; +/** + * struct onfi_params - ONFI specific parameters that will be reused + * @version: ONFI version (BCD encoded), 0 if ONFI is not supported + * @tPROG: Page program time + * @tBERS: Block erase time + * @tR: Page read time + * @tCCS: Change column setup time + * @async_timing_mode: Supported asynchronous timing mode + * @vendor_revision: Vendor specific revision number + * @vendor: Vendor specific data + */ +struct onfi_params { + int version; + u16 tPROG; + u16 tBERS; + u16 tR; + u16 tCCS; + u16 async_timing_mode; + u16 vendor_revision; + u8 vendor[88]; +}; + /** * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name * @supports_set_get_features: The NAND chip supports setting/getting features + * @onfi: ONFI specific parameters */ struct nand_parameters { + /* Generic parameters */ char model[100]; bool supports_set_get_features; + + /* ONFI parameters */ + struct onfi_params onfi; }; /* The maximum expected count of bytes in the NAND ID sequence */ @@ -1167,8 +1194,6 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * currently in data_buf. * @subpagesize: [INTERN] holds the subpagesize * @id: [INTERN] holds NAND ID - * @onfi_version: [INTERN] holds the chip ONFI version (BCD encoded), - * non 0 if ONFI supported. * @jedec_version: [INTERN] holds the chip JEDEC version (BCD encoded), * non 0 if JEDEC supported. * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is @@ -1255,7 +1280,6 @@ struct nand_chip { int badblockbits; struct nand_id id; - int onfi_version; int jedec_version; union { struct nand_onfi_params onfi_params; @@ -1548,26 +1572,13 @@ struct platform_nand_data { struct platform_nand_ctrl ctrl; }; -/* return the supported features. */ -static inline int onfi_feature(struct nand_chip *chip) -{ - return chip->onfi_version ? le16_to_cpu(chip->onfi_params.features) : 0; -} - /* return the supported asynchronous timing mode. */ static inline int onfi_get_async_timing_mode(struct nand_chip *chip) { - if (!chip->onfi_version) + if (!chip->parameters.onfi.version) return ONFI_TIMING_MODE_UNKNOWN; - return le16_to_cpu(chip->onfi_params.async_timing_mode); -} -/* return the supported synchronous timing mode. */ -static inline int onfi_get_sync_timing_mode(struct nand_chip *chip) -{ - if (!chip->onfi_version) - return ONFI_TIMING_MODE_UNKNOWN; - return le16_to_cpu(chip->onfi_params.src_sync_timing_mode); + return chip->parameters.onfi.async_timing_mode; } int onfi_fill_data_interface(struct nand_chip *chip, -- cgit v1.2.3 From 789157e41a0694e70bf80bceecd79438c3de98d6 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 19 Mar 2018 14:47:28 +0100 Subject: mtd: rawnand: allow vendors to declare (un)supported features If SET/GET_FEATURES is available (from the parameter page), use a bitmap to declare what feature is actually supported. Initialize the bitmap in the core to support timing changes (only feature used by the core), also add support for Micron specific features used in Micron initialization code (in the init routine). Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 7b5afa6ef5a9..d9f417719d36 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -21,6 +21,7 @@ #include #include #include +#include struct mtd_info; struct nand_flash_dev; @@ -235,7 +236,8 @@ struct nand_chip; #define ONFI_TIMING_MODE_5 (1 << 5) #define ONFI_TIMING_MODE_UNKNOWN (1 << 6) -/* ONFI feature address */ +/* ONFI feature number/address */ +#define ONFI_FEATURE_NUMBER 256 #define ONFI_FEATURE_ADDR_TIMING_MODE 0x1 /* Vendor-specific feature address (Micron) */ @@ -455,12 +457,16 @@ struct onfi_params { * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name * @supports_set_get_features: The NAND chip supports setting/getting features + * @set_feature_list: Bitmap of features that can be set + * @get_feature_list: Bitmap of features that can be get * @onfi: ONFI specific parameters */ struct nand_parameters { /* Generic parameters */ char model[100]; bool supports_set_get_features; + DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); + DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); /* ONFI parameters */ struct onfi_params onfi; -- cgit v1.2.3 From 480139d9229e3be0530bc548da208b5f49b1ab90 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 19 Mar 2018 14:47:30 +0100 Subject: mtd: rawnand: get rid of the JEDEC parameter page in nand_chip The NAND chip parameter page is statically allocated within the nand_chip structure, which reserves a lot of space. Even not ONFI nor JEDEC chips have it embedded. Also, only a few parameters are actually read from the parameter page after the detection. Now that there is a small nand_parameters structure that can held generic parameters, remove the JEDEC page from the nand_chip structure by just allocating it during the identification phase and removing it right after. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index d9f417719d36..cf82a959b0f3 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1200,12 +1200,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * currently in data_buf. * @subpagesize: [INTERN] holds the subpagesize * @id: [INTERN] holds NAND ID - * @jedec_version: [INTERN] holds the chip JEDEC version (BCD encoded), - * non 0 if JEDEC supported. * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is * supported, 0 otherwise. - * @jedec_params: [INTERN] holds the JEDEC parameter page when JEDEC is - * supported, 0 otherwise. * @parameters: [INTERN] holds generic parameters under an easily * readable form. * @max_bb_per_die: [INTERN] the max number of bad blocks each die of a @@ -1286,11 +1282,7 @@ struct nand_chip { int badblockbits; struct nand_id id; - int jedec_version; - union { - struct nand_onfi_params onfi_params; - struct nand_jedec_params jedec_params; - }; + struct nand_onfi_params onfi_params; struct nand_parameters parameters; u16 max_bb_per_die; u32 blocks_per_die; @@ -1621,13 +1613,6 @@ static inline int nand_opcode_8bits(unsigned int command) return 0; } -/* return the supported JEDEC features. */ -static inline int jedec_feature(struct nand_chip *chip) -{ - return chip->jedec_version ? le16_to_cpu(chip->jedec_params.features) - : 0; -} - /* get timing characteristics from ONFI timing mode. */ const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode); -- cgit v1.2.3 From bd0b64340c2d66c0fe1aa99b0b23159d7e0c21f2 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 19 Mar 2018 14:47:31 +0100 Subject: mtd: rawnand: get rid of the ONFI parameter page in nand_chip The NAND chip parameter page is statically allocated within the nand_chip structure, which reserves a lot of space. Even not ONFI nor JEDEC chips have it embedded. Also, only a few parameters are actually read from the parameter page after the detection. Now that there is a small nand_parameters structure that hold all needed ONFI parameters, remove the ONFI page from the nand_chip structure by just allocating it during the identification phase and removing it right after. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index cf82a959b0f3..5dad59b31244 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1200,8 +1200,6 @@ int nand_op_parser_exec_op(struct nand_chip *chip, * currently in data_buf. * @subpagesize: [INTERN] holds the subpagesize * @id: [INTERN] holds NAND ID - * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is - * supported, 0 otherwise. * @parameters: [INTERN] holds generic parameters under an easily * readable form. * @max_bb_per_die: [INTERN] the max number of bad blocks each die of a @@ -1282,7 +1280,6 @@ struct nand_chip { int badblockbits; struct nand_id id; - struct nand_onfi_params onfi_params; struct nand_parameters parameters; u16 max_bb_per_die; u32 blocks_per_die; -- cgit v1.2.3 From 04dfac09068766550e3173aac88ff70d70958050 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 14 Mar 2018 15:53:10 +0200 Subject: ARM: omap2+: control: add support for auxiliary control module instances Control module can have multiple instances in a system, each with separate address space and features. Add base support for these auxiliary instances, with support for syscon and clock mappings under them. Signed-off-by: Tero Kristo Tested-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- include/linux/clk/ti.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index d18da839b810..7e3bceee3489 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -203,6 +203,7 @@ enum { TI_CLKM_PRM, TI_CLKM_SCRM, TI_CLKM_CTRL, + TI_CLKM_CTRL_AUX, TI_CLKM_PLLSS, CLK_MAX_MEMMAPS }; -- cgit v1.2.3 From 2d172691515961cad2abb4bf1b15d187bf2106cf Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 15 Mar 2018 21:52:18 -0500 Subject: clk: davinci: New driver for davinci PLL clocks This adds a new driver for mach-davinci PLL clocks. This is porting the code from arch/arm/mach-davinci/clock.c to the common clock framework. Additionally, it adds device tree support for these clocks. The ifeq ($(CONFIG_COMMON_CLK), y) in the Makefile is needed to prevent compile errors until the clock code in arch/arm/mach-davinci is removed. Note: although there are similar clocks for TI Keystone we are not able to share the code for a few reasons. The keystone clocks are device tree only and use legacy one-node-per-clock bindings. Also the register layouts are a bit different, which would add even more if/else mess to the keystone clocks. And the keystone PLL driver doesn't support setting clock rates. Signed-off-by: David Lechner Signed-off-by: Stephen Boyd --- include/linux/platform_data/clk-davinci-pll.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/linux/platform_data/clk-davinci-pll.h (limited to 'include/linux') diff --git a/include/linux/platform_data/clk-davinci-pll.h b/include/linux/platform_data/clk-davinci-pll.h new file mode 100644 index 000000000000..e55dab1d578b --- /dev/null +++ b/include/linux/platform_data/clk-davinci-pll.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PLL clock driver for TI Davinci SoCs + * + * Copyright (C) 2018 David Lechner + */ + +#ifndef __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ +#define __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ + +#include + +/** + * davinci_pll_platform_data + * @cfgchip: CFGCHIP syscon regmap + */ +struct davinci_pll_platform_data { + struct regmap *cfgchip; +}; + +#endif /* __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ */ -- cgit v1.2.3 From 1e88a8d64f221208801bb279ee7452df0b6d609f Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 15 Mar 2018 21:52:34 -0500 Subject: clk: davinci: New driver for TI DA8XX CFGCHIP clocks This adds a new driver for the gate and multiplexer clocks in the CFGCHIPn syscon registers on TI DA8XX-type SoCs. Signed-off-by: David Lechner Signed-off-by: Stephen Boyd --- include/linux/platform_data/clk-da8xx-cfgchip.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/linux/platform_data/clk-da8xx-cfgchip.h (limited to 'include/linux') diff --git a/include/linux/platform_data/clk-da8xx-cfgchip.h b/include/linux/platform_data/clk-da8xx-cfgchip.h new file mode 100644 index 000000000000..de0f77d38669 --- /dev/null +++ b/include/linux/platform_data/clk-da8xx-cfgchip.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * clk-da8xx-cfgchip - TI DaVinci DA8xx CFGCHIP clock driver + * + * Copyright (C) 2018 David Lechner + */ + +#ifndef __LINUX_PLATFORM_DATA_CLK_DA8XX_CFGCHIP_H__ +#define __LINUX_PLATFORM_DATA_CLK_DA8XX_CFGCHIP_H__ + +#include + +/** + * da8xx_cfgchip_clk_platform_data + * @cfgchip: CFGCHIP syscon regmap + */ +struct da8xx_cfgchip_clk_platform_data { + struct regmap *cfgchip; +}; + +#endif /* __LINUX_PLATFORM_DATA_CLK_DA8XX_CFGCHIP_H__ */ -- cgit v1.2.3 From 97cc3264508f33783ba21573204d7e0bf5b197e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Mar 2018 17:05:15 -0400 Subject: svcrdma: Consult max_qp_init_rd_atom when accepting connections The target needs to return the lesser of the client's Inbound RDMA Read Queue Depth (IRD), provided in the connection parameters, and the local device's Outbound RDMA Read Queue Depth (ORD). The latter limit is max_qp_init_rd_atom, not max_qp_rd_atom. The svcrdma_ord value caps the ORD value for iWARP transports, which do not exchange ORD/IRD values at connection time. Since no other Linux kernel RDMA-enabled storage target sees fit to provide this cap, I'm removing it here too. initiator_depth is a u8, so ensure the computed ORD value does not overflow that field. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 4b731b046bcd..7337e1221590 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -132,9 +132,6 @@ struct svcxprt_rdma { #define RDMAXPRT_CONN_PENDING 3 #define RPCRDMA_LISTEN_BACKLOG 10 -/* The default ORD value is based on two outstanding full-size writes with a - * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ -#define RPCRDMA_ORD (64/4) #define RPCRDMA_MAX_REQUESTS 32 /* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our -- cgit v1.2.3 From 66afdedf269cf485efb5affb30c34e1f37705445 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 12 Feb 2018 09:53:12 +0100 Subject: extcon: gpio: Localize platform data Nothing in the entire kernel #includes so move the platform data declaration inside of the driver. Signed-off-by: Linus Walleij Signed-off-by: Chanwoo Choi --- include/linux/extcon/extcon-gpio.h | 47 -------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 include/linux/extcon/extcon-gpio.h (limited to 'include/linux') diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h deleted file mode 100644 index 7cacafb78b09..000000000000 --- a/include/linux/extcon/extcon-gpio.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Single-state GPIO extcon driver based on extcon class - * - * Copyright (C) 2012 Samsung Electronics - * Author: MyungJoo Ham - * - * based on switch class driver - * Copyright (C) 2008 Google, Inc. - * Author: Mike Lockwood - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -#ifndef __EXTCON_GPIO_H__ -#define __EXTCON_GPIO_H__ __FILE__ - -#include - -/** - * struct gpio_extcon_pdata - A simple GPIO-controlled extcon device. - * @extcon_id: The unique id of specific external connector. - * @gpio: Corresponding GPIO. - * @gpio_active_low: Boolean describing whether gpio active state is 1 or 0 - * If true, low state of gpio means active. - * If false, high state of gpio means active. - * @debounce: Debounce time for GPIO IRQ in ms. - * @irq_flags: IRQ Flags (e.g., IRQF_TRIGGER_LOW). - * @check_on_resume: Boolean describing whether to check the state of gpio - * while resuming from sleep. - */ -struct gpio_extcon_pdata { - unsigned int extcon_id; - unsigned gpio; - bool gpio_active_low; - unsigned long debounce; - unsigned long irq_flags; - - bool check_on_resume; -}; - -#endif /* __EXTCON_GPIO_H__ */ -- cgit v1.2.3 From e7bfb3fdbde3bfeeeb64e2d73ac6babe59519c9e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 12 Feb 2018 22:03:11 +0100 Subject: mtd: Stop updating erase_info->state and calling mtd_erase_callback() MTD users are no longer checking erase_info->state to determine if the erase operation failed or succeeded. Moreover, mtd_erase_callback() is now a NOP. We can safely get rid of all mtd_erase_callback() calls and all erase_info->state assignments. While at it, get rid of the erase_info->state field, all MTD_ERASE_XXX definitions and the mtd_erase_callback() function. Signed-off-by: Boris Brezillon Reviewed-by: Richard Weinberger Reviewed-by: Miquel Raynal Acked-by: Bert Kenward --- Changes in v2: - Address a few coding style issues (reported by Miquel) - Remove comments that are no longer valid (reported by Miquel) --- include/linux/mtd/mtd.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 4cbb7f555244..a86c4fa93115 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -30,12 +30,6 @@ #include -#define MTD_ERASE_PENDING 0x01 -#define MTD_ERASING 0x02 -#define MTD_ERASE_SUSPEND 0x04 -#define MTD_ERASE_DONE 0x08 -#define MTD_ERASE_FAILED 0x10 - #define MTD_FAIL_ADDR_UNKNOWN -1LL struct mtd_info; @@ -49,7 +43,6 @@ struct erase_info { uint64_t addr; uint64_t len; uint64_t fail_addr; - u_char state; }; struct mtd_erase_region_info { @@ -589,8 +582,6 @@ extern void register_mtd_user (struct mtd_notifier *new); extern int unregister_mtd_user (struct mtd_notifier *old); void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size); -void mtd_erase_callback(struct erase_info *instr); - static inline int mtd_is_bitflip(int err) { return err == -EUCLEAN; } -- cgit v1.2.3 From 6b00c35138b404be98b85f4a703be594cbed501c Mon Sep 17 00:00:00 2001 From: Jagdish Gediya Date: Thu, 22 Mar 2018 01:08:10 +0530 Subject: mtd: nand: fsl_ifc: Read ECCSTAT0 and ECCSTAT1 registers for IFC 2.0 Due to missing information in Hardware manual, current implementation doesn't read ECCSTAT0 and ECCSTAT1 registers for IFC 2.0. Add support to read ECCSTAT0 and ECCSTAT1 registers during ecccheck for IFC 2.0. Fixes: 656441478ed5 ("mtd: nand: ifc: Fix location of eccstat registers for IFC V1.0") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Jagdish Gediya Reviewed-by: Prabhakar Kushwaha Signed-off-by: Boris Brezillon --- include/linux/fsl_ifc.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index c332f0a45607..3fdfede2f0f3 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -734,11 +734,7 @@ struct fsl_ifc_nand { u32 res19[0x10]; __be32 nand_fsr; u32 res20; - /* The V1 nand_eccstat is actually 4 words that overlaps the - * V2 nand_eccstat. - */ - __be32 v1_nand_eccstat[2]; - __be32 v2_nand_eccstat[6]; + __be32 nand_eccstat[8]; u32 res21[0x1c]; __be32 nanndcr; u32 res22[0x2]; -- cgit v1.2.3 From 9a2fe9b801f585baccf8352d82839dcd54b300cf Mon Sep 17 00:00:00 2001 From: Ruslan Bilovol Date: Wed, 21 Mar 2018 02:03:59 +0200 Subject: ALSA: usb: initial USB Audio Device Class 3.0 support Recently released USB Audio Class 3.0 specification introduces many significant changes comparing to previous versions, like - new Power Domains, support for LPM/L1 - new Cluster descriptor - changed layout of all class-specific descriptors - new High Capability descriptors - New class-specific String descriptors - new and removed units - additional sources for interrupts - removed Type II Audio Data Formats - ... and many other things (check spec) It also provides backward compatibility through multiple configurations, as well as requires mandatory support for BADD (Basic Audio Device Definition) on each ADC3.0 compliant device This patch adds initial support of UAC3 specification that is enough for Generic I/O Profile (BAOF, BAIF) device support from BADD document. Signed-off-by: Ruslan Bilovol Reviewed-by: Greg Kroah-Hartman Signed-off-by: Takashi Iwai --- include/linux/usb/audio-v2.h | 4 +- include/linux/usb/audio-v3.h | 395 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 397 insertions(+), 2 deletions(-) create mode 100644 include/linux/usb/audio-v3.h (limited to 'include/linux') diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index 3119d0ace7aa..2db83a191e78 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -34,12 +34,12 @@ * */ -static inline bool uac2_control_is_readable(u32 bmControls, u8 control) +static inline bool uac_v2v3_control_is_readable(u32 bmControls, u8 control) { return (bmControls >> (control * 2)) & 0x1; } -static inline bool uac2_control_is_writeable(u32 bmControls, u8 control) +static inline bool uac_v2v3_control_is_writeable(u32 bmControls, u8 control) { return (bmControls >> (control * 2)) & 0x2; } diff --git a/include/linux/usb/audio-v3.h b/include/linux/usb/audio-v3.h new file mode 100644 index 000000000000..a8959aaba0ae --- /dev/null +++ b/include/linux/usb/audio-v3.h @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2017 Ruslan Bilovol + * + * This file holds USB constants and structures defined + * by the USB DEVICE CLASS DEFINITION FOR AUDIO DEVICES Release 3.0. + */ + +#ifndef __LINUX_USB_AUDIO_V3_H +#define __LINUX_USB_AUDIO_V3_H + +#include + +/* + * v1.0, v2.0 and v3.0 of this standard have many things in common. For the rest + * of the definitions, please refer to audio.h and audio-v2.h + */ + +/* All High Capability descriptors have these 2 fields at the beginning */ +struct uac3_hc_descriptor_header { + __le16 wLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __le16 wDescriptorID; +} __attribute__ ((packed)); + +/* 4.3.1 CLUSTER DESCRIPTOR HEADER */ +struct uac3_cluster_header_descriptor { + __le16 wLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __le16 wDescriptorID; + __u8 bNrChannels; +} __attribute__ ((packed)); + +/* 4.3.2.1 SEGMENTS */ +struct uac3_cluster_segment_descriptor { + __le16 wLength; + __u8 bSegmentType; + /* __u8[0]; segment-specific data */ +} __attribute__ ((packed)); + +/* 4.3.2.1.1 END SEGMENT */ +struct uac3_cluster_end_segment_descriptor { + __le16 wLength; + __u8 bSegmentType; /* Constant END_SEGMENT */ +} __attribute__ ((packed)); + +/* 4.3.2.1.3.1 INFORMATION SEGMENT */ +struct uac3_cluster_information_segment_descriptor { + __le16 wLength; + __u8 bSegmentType; + __u8 bChPurpose; + __u8 bChRelationship; + __u8 bChGroupID; +} __attribute__ ((packed)); + +/* 4.5.2 CLASS-SPECIFIC AC INTERFACE DESCRIPTOR */ +struct uac3_ac_header_descriptor { + __u8 bLength; /* 10 */ + __u8 bDescriptorType; /* CS_INTERFACE descriptor type */ + __u8 bDescriptorSubtype; /* HEADER descriptor subtype */ + __u8 bCategory; + + /* includes Clock Source, Unit, Terminal, and Power Domain desc. */ + __le16 wTotalLength; + + __le32 bmControls; +} __attribute__ ((packed)); + +/* 4.5.2.1 INPUT TERMINAL DESCRIPTOR */ +struct uac3_input_terminal_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __u8 bTerminalID; + __le16 wTerminalType; + __u8 bAssocTerminal; + __u8 bCSourceID; + __le32 bmControls; + __le16 wClusterDescrID; + __le16 wExTerminalDescrID; + __le16 wConnectorsDescrID; + __le16 wTerminalDescrStr; +} __attribute__((packed)); + +/* 4.5.2.2 OUTPUT TERMINAL DESCRIPTOR */ +struct uac3_output_terminal_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __u8 bTerminalID; + __le16 wTerminalType; + __u8 bAssocTerminal; + __u8 bSourceID; + __u8 bCSourceID; + __le32 bmControls; + __le16 wExTerminalDescrID; + __le16 wConnectorsDescrID; + __le16 wTerminalDescrStr; +} __attribute__((packed)); + +/* 4.5.2.7 FEATURE UNIT DESCRIPTOR */ +struct uac3_feature_unit_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __u8 bUnitID; + __u8 bSourceID; + /* bmaControls is actually u32, + * but u8 is needed for the hybrid parser */ + __u8 bmaControls[0]; /* variable length */ + /* wFeatureDescrStr omitted */ +} __attribute__((packed)); + +#define UAC3_DT_FEATURE_UNIT_SIZE(ch) (7 + ((ch) + 1) * 4) + +/* As above, but more useful for defining your own descriptors */ +#define DECLARE_UAC3_FEATURE_UNIT_DESCRIPTOR(ch) \ +struct uac3_feature_unit_descriptor_##ch { \ + __u8 bLength; \ + __u8 bDescriptorType; \ + __u8 bDescriptorSubtype; \ + __u8 bUnitID; \ + __u8 bSourceID; \ + __le32 bmaControls[ch + 1]; \ + __le16 wFeatureDescrStr; \ +} __attribute__ ((packed)) + +/* 4.5.2.12 CLOCK SOURCE DESCRIPTOR */ +struct uac3_clock_source_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __u8 bClockID; + __u8 bmAttributes; + __le32 bmControls; + __u8 bReferenceTerminal; + __le16 wClockSourceStr; +} __attribute__((packed)); + +/* bmAttribute fields */ +#define UAC3_CLOCK_SOURCE_TYPE_EXT 0x0 +#define UAC3_CLOCK_SOURCE_TYPE_INT 0x1 +#define UAC3_CLOCK_SOURCE_ASYNC (0 << 2) +#define UAC3_CLOCK_SOURCE_SYNCED_TO_SOF (1 << 1) + +/* 4.5.2.13 CLOCK SELECTOR DESCRIPTOR */ +struct uac3_clock_selector_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __u8 bClockID; + __u8 bNrInPins; + __u8 baCSourceID[]; + /* bmControls and wCSelectorDescrStr omitted */ +} __attribute__((packed)); + +/* 4.5.2.14 CLOCK MULTIPLIER DESCRIPTOR */ +struct uac3_clock_multiplier_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __u8 bClockID; + __u8 bCSourceID; + __le32 bmControls; + __le16 wCMultiplierDescrStr; +} __attribute__((packed)); + +/* 4.5.2.15 POWER DOMAIN DESCRIPTOR */ +struct uac3_power_domain_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __u8 bPowerDomainID; + __le16 waRecoveryTime1; + __le16 waRecoveryTime2; + __u8 bNrEntities; + __u8 baEntityID[]; + /* wPDomainDescrStr omitted */ +} __attribute__((packed)); + +/* As above, but more useful for defining your own descriptors */ +#define DECLARE_UAC3_POWER_DOMAIN_DESCRIPTOR(n) \ +struct uac3_power_domain_descriptor_##n { \ + __u8 bLength; \ + __u8 bDescriptorType; \ + __u8 bDescriptorSubtype; \ + __u8 bPowerDomainID; \ + __le16 waRecoveryTime1; \ + __le16 waRecoveryTime2; \ + __u8 bNrEntities; \ + __u8 baEntityID[n]; \ + __le16 wPDomainDescrStr; \ +} __attribute__ ((packed)) + +/* 4.7.2 CLASS-SPECIFIC AS INTERFACE DESCRIPTOR */ +struct uac3_as_header_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __u8 bTerminalLink; + __le32 bmControls; + __le16 wClusterDescrID; + __le64 bmFormats; + __u8 bSubslotSize; + __u8 bBitResolution; + __le16 bmAuxProtocols; + __u8 bControlSize; +} __attribute__((packed)); + +#define UAC3_FORMAT_TYPE_I_RAW_DATA (1 << 6) + +/* 4.8.1.2 CLASS-SPECIFIC AS ISOCHRONOUS AUDIO DATA ENDPOINT DESCRIPTOR */ +struct uac3_iso_endpoint_descriptor { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubtype; + __le32 bmControls; + __u8 bLockDelayUnits; + __le16 wLockDelay; +} __attribute__((packed)); + +/* 6.1 INTERRUPT DATA MESSAGE */ +struct uac3_interrupt_data_msg { + __u8 bInfo; + __u8 bSourceType; + __le16 wValue; + __le16 wIndex; +} __attribute__((packed)); + +/* A.2 AUDIO AUDIO FUNCTION SUBCLASS CODES */ +#define UAC3_FUNCTION_SUBCLASS_UNDEFINED 0x00 +#define UAC3_FUNCTION_SUBCLASS_FULL_ADC_3_0 0x01 +/* BADD profiles */ +#define UAC3_FUNCTION_SUBCLASS_GENERIC_IO 0x20 +#define UAC3_FUNCTION_SUBCLASS_HEADPHONE 0x21 +#define UAC3_FUNCTION_SUBCLASS_SPEAKER 0x22 +#define UAC3_FUNCTION_SUBCLASS_MICROPHONE 0x23 +#define UAC3_FUNCTION_SUBCLASS_HEADSET 0x24 +#define UAC3_FUNCTION_SUBCLASS_HEADSET_ADAPTER 0x25 +#define UAC3_FUNCTION_SUBCLASS_SPEAKERPHONE 0x26 + +/* A.7 AUDIO FUNCTION CATEGORY CODES */ +#define UAC3_FUNCTION_SUBCLASS_UNDEFINED 0x00 +#define UAC3_FUNCTION_DESKTOP_SPEAKER 0x01 +#define UAC3_FUNCTION_HOME_THEATER 0x02 +#define UAC3_FUNCTION_MICROPHONE 0x03 +#define UAC3_FUNCTION_HEADSET 0x04 +#define UAC3_FUNCTION_TELEPHONE 0x05 +#define UAC3_FUNCTION_CONVERTER 0x06 +#define UAC3_FUNCTION_SOUND_RECORDER 0x07 +#define UAC3_FUNCTION_IO_BOX 0x08 +#define UAC3_FUNCTION_MUSICAL_INSTRUMENT 0x09 +#define UAC3_FUNCTION_PRO_AUDIO 0x0a +#define UAC3_FUNCTION_AUDIO_VIDEO 0x0b +#define UAC3_FUNCTION_CONTROL_PANEL 0x0c +#define UAC3_FUNCTION_HEADPHONE 0x0d +#define UAC3_FUNCTION_GENERIC_SPEAKER 0x0e +#define UAC3_FUNCTION_HEADSET_ADAPTER 0x0f +#define UAC3_FUNCTION_SPEAKERPHONE 0x10 +#define UAC3_FUNCTION_OTHER 0xff + +/* A.8 AUDIO CLASS-SPECIFIC DESCRIPTOR TYPES */ +#define UAC3_CS_UNDEFINED 0x20 +#define UAC3_CS_DEVICE 0x21 +#define UAC3_CS_CONFIGURATION 0x22 +#define UAC3_CS_STRING 0x23 +#define UAC3_CS_INTERFACE 0x24 +#define UAC3_CS_ENDPOINT 0x25 +#define UAC3_CS_CLUSTER 0x26 + +/* A.10 CLUSTER DESCRIPTOR SEGMENT TYPES */ +#define UAC3_SEGMENT_UNDEFINED 0x00 +#define UAC3_CLUSTER_DESCRIPTION 0x01 +#define UAC3_CLUSTER_VENDOR_DEFINED 0x1F +#define UAC3_CHANNEL_INFORMATION 0x20 +#define UAC3_CHANNEL_AMBISONIC 0x21 +#define UAC3_CHANNEL_DESCRIPTION 0x22 +#define UAC3_CHANNEL_VENDOR_DEFINED 0xFE +#define UAC3_END_SEGMENT 0xFF + +/* A.11 CHANNEL PURPOSE DEFINITIONS */ +#define UAC3_PURPOSE_UNDEFINED 0x00 +#define UAC3_PURPOSE_GENERIC_AUDIO 0x01 +#define UAC3_PURPOSE_VOICE 0x02 +#define UAC3_PURPOSE_SPEECH 0x03 +#define UAC3_PURPOSE_AMBIENT 0x04 +#define UAC3_PURPOSE_REFERENCE 0x05 +#define UAC3_PURPOSE_ULTRASONIC 0x06 +#define UAC3_PURPOSE_VIBROKINETIC 0x07 +#define UAC3_PURPOSE_NON_AUDIO 0xFF + +/* A.12 CHANNEL RELATIONSHIP DEFINITIONS */ +#define UAC3_CH_RELATIONSHIP_UNDEFINED 0x00 +#define UAC3_CH_MONO 0x01 +#define UAC3_CH_LEFT 0x02 +#define UAC3_CH_RIGHT 0x03 +#define UAC3_CH_ARRAY 0x04 +#define UAC3_CH_PATTERN_X 0x20 +#define UAC3_CH_PATTERN_Y 0x21 +#define UAC3_CH_PATTERN_A 0x22 +#define UAC3_CH_PATTERN_B 0x23 +#define UAC3_CH_PATTERN_M 0x24 +#define UAC3_CH_PATTERN_S 0x25 +#define UAC3_CH_FRONT_LEFT 0x80 +#define UAC3_CH_FRONT_RIGHT 0x81 +#define UAC3_CH_FRONT_CENTER 0x82 +#define UAC3_CH_FRONT_LEFT_OF_CENTER 0x83 +#define UAC3_CH_FRONT_RIGHT_OF_CENTER 0x84 +#define UAC3_CH_FRONT_WIDE_LEFT 0x85 +#define UAC3_CH_FRONT_WIDE_RIGHT 0x86 +#define UAC3_CH_SIDE_LEFT 0x87 +#define UAC3_CH_SIDE_RIGHT 0x88 +#define UAC3_CH_SURROUND_ARRAY_LEFT 0x89 +#define UAC3_CH_SURROUND_ARRAY_RIGHT 0x8A +#define UAC3_CH_BACK_LEFT 0x8B +#define UAC3_CH_BACK_RIGHT 0x8C +#define UAC3_CH_BACK_CENTER 0x8D +#define UAC3_CH_BACK_LEFT_OF_CENTER 0x8E +#define UAC3_CH_BACK_RIGHT_OF_CENTER 0x8F +#define UAC3_CH_BACK_WIDE_LEFT 0x90 +#define UAC3_CH_BACK_WIDE_RIGHT 0x91 +#define UAC3_CH_TOP_CENTER 0x92 +#define UAC3_CH_TOP_FRONT_LEFT 0x93 +#define UAC3_CH_TOP_FRONT_RIGHT 0x94 +#define UAC3_CH_TOP_FRONT_CENTER 0x95 +#define UAC3_CH_TOP_FRONT_LOC 0x96 +#define UAC3_CH_TOP_FRONT_ROC 0x97 +#define UAC3_CH_TOP_FRONT_WIDE_LEFT 0x98 +#define UAC3_CH_TOP_FRONT_WIDE_RIGHT 0x99 +#define UAC3_CH_TOP_SIDE_LEFT 0x9A +#define UAC3_CH_TOP_SIDE_RIGHT 0x9B +#define UAC3_CH_TOP_SURR_ARRAY_LEFT 0x9C +#define UAC3_CH_TOP_SURR_ARRAY_RIGHT 0x9D +#define UAC3_CH_TOP_BACK_LEFT 0x9E +#define UAC3_CH_TOP_BACK_RIGHT 0x9F +#define UAC3_CH_TOP_BACK_CENTER 0xA0 +#define UAC3_CH_TOP_BACK_LOC 0xA1 +#define UAC3_CH_TOP_BACK_ROC 0xA2 +#define UAC3_CH_TOP_BACK_WIDE_LEFT 0xA3 +#define UAC3_CH_TOP_BACK_WIDE_RIGHT 0xA4 +#define UAC3_CH_BOTTOM_CENTER 0xA5 +#define UAC3_CH_BOTTOM_FRONT_LEFT 0xA6 +#define UAC3_CH_BOTTOM_FRONT_RIGHT 0xA7 +#define UAC3_CH_BOTTOM_FRONT_CENTER 0xA8 +#define UAC3_CH_BOTTOM_FRONT_LOC 0xA9 +#define UAC3_CH_BOTTOM_FRONT_ROC 0xAA +#define UAC3_CH_BOTTOM_FRONT_WIDE_LEFT 0xAB +#define UAC3_CH_BOTTOM_FRONT_WIDE_RIGHT 0xAC +#define UAC3_CH_BOTTOM_SIDE_LEFT 0xAD +#define UAC3_CH_BOTTOM_SIDE_RIGHT 0xAE +#define UAC3_CH_BOTTOM_SURR_ARRAY_LEFT 0xAF +#define UAC3_CH_BOTTOM_SURR_ARRAY_RIGHT 0xB0 +#define UAC3_CH_BOTTOM_BACK_LEFT 0xB1 +#define UAC3_CH_BOTTOM_BACK_RIGHT 0xB2 +#define UAC3_CH_BOTTOM_BACK_CENTER 0xB3 +#define UAC3_CH_BOTTOM_BACK_LOC 0xB4 +#define UAC3_CH_BOTTOM_BACK_ROC 0xB5 +#define UAC3_CH_BOTTOM_BACK_WIDE_LEFT 0xB6 +#define UAC3_CH_BOTTOM_BACK_WIDE_RIGHT 0xB7 +#define UAC3_CH_LOW_FREQUENCY_EFFECTS 0xB8 +#define UAC3_CH_LFE_LEFT 0xB9 +#define UAC3_CH_LFE_RIGHT 0xBA +#define UAC3_CH_HEADPHONE_LEFT 0xBB +#define UAC3_CH_HEADPHONE_RIGHT 0xBC + +/* A.15 AUDIO CLASS-SPECIFIC AC INTERFACE DESCRIPTOR SUBTYPES */ +/* see audio.h for the rest, which is identical to v1 */ +#define UAC3_EXTENDED_TERMINAL 0x04 +#define UAC3_MIXER_UNIT 0x05 +#define UAC3_SELECTOR_UNIT 0x06 +#define UAC3_FEATURE_UNIT 0x07 +#define UAC3_EFFECT_UNIT 0x08 +#define UAC3_PROCESSING_UNIT 0x09 +#define UAC3_EXTENSION_UNIT 0x0a +#define UAC3_CLOCK_SOURCE 0x0b +#define UAC3_CLOCK_SELECTOR 0x0c +#define UAC3_CLOCK_MULTIPLIER 0x0d +#define UAC3_SAMPLE_RATE_CONVERTER 0x0e +#define UAC3_CONNECTORS 0x0f +#define UAC3_POWER_DOMAIN 0x10 + +/* A.22 AUDIO CLASS-SPECIFIC REQUEST CODES */ +/* see audio-v2.h for the rest, which is identical to v2 */ +#define UAC3_CS_REQ_INTEN 0x04 +#define UAC3_CS_REQ_STRING 0x05 +#define UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR 0x06 + +/* A.23.1 AUDIOCONTROL INTERFACE CONTROL SELECTORS */ +#define UAC3_AC_CONTROL_UNDEFINED 0x00 +#define UAC3_AC_ACTIVE_INTERFACE_CONTROL 0x01 +#define UAC3_AC_POWER_DOMAIN_CONTROL 0x02 + +#endif /* __LINUX_USB_AUDIO_V3_H */ -- cgit v1.2.3 From 94b9d9b7a14cbb1640868d53b27f403ed2e5b4a9 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 21 Mar 2018 04:42:20 -0400 Subject: audit: remove path param from link denied function In commit 45b578fe4c3cade6f4ca1fc934ce199afd857edc ("audit: link denied should not directly generate PATH record") the need for the struct path *link parameter was removed. Remove the now useless struct path argument. Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index af410d9fbf2d..75d5b031e802 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -146,8 +146,7 @@ extern void audit_log_d_path(struct audit_buffer *ab, const struct path *path); extern void audit_log_key(struct audit_buffer *ab, char *key); -extern void audit_log_link_denied(const char *operation, - const struct path *link); +extern void audit_log_link_denied(const char *operation); extern void audit_log_lost(const char *message); extern int audit_log_task_context(struct audit_buffer *ab); @@ -194,8 +193,7 @@ static inline void audit_log_d_path(struct audit_buffer *ab, { } static inline void audit_log_key(struct audit_buffer *ab, char *key) { } -static inline void audit_log_link_denied(const char *string, - const struct path *link) +static inline void audit_log_link_denied(const char *string) { } static inline int audit_log_task_context(struct audit_buffer *ab) { -- cgit v1.2.3 From 1acfb9b7ee0b1881bb8e875b6757976e48293ec4 Mon Sep 17 00:00:00 2001 From: Jay Fang Date: Mon, 12 Mar 2018 17:13:32 +0800 Subject: PCI: Add decoding for 16 GT/s link speed PCIe 4.0 defines the 16.0 GT/s link speed. Links can run at that speed without any Linux changes, but previously their sysfs "max_link_speed" and "current_link_speed" files contained "Unknown speed", not the expected "16.0 GT/s". Add decoding for the new 16 GT/s link speed. Signed-off-by: Jay Fang [bhelgaas: add PCI_EXP_LNKCAP2_SLS_16_0GB] Signed-off-by: Bjorn Helgaas Reviewed-by: Dongdong Liu --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 024a1beda008..8043a5937ad0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -256,6 +256,7 @@ enum pci_bus_speed { PCIE_SPEED_2_5GT = 0x14, PCIE_SPEED_5_0GT = 0x15, PCIE_SPEED_8_0GT = 0x16, + PCIE_SPEED_16_0GT = 0x17, PCI_SPEED_UNKNOWN = 0xff, }; -- cgit v1.2.3 From 031e3601869c815582ca1d49d1ff73de58e446b0 Mon Sep 17 00:00:00 2001 From: Zhichang Yuan Date: Thu, 15 Mar 2018 02:15:50 +0800 Subject: lib: Add generic PIO mapping method 41f8bba7f555 ("of/pci: Add pci_register_io_range() and pci_pio_to_address()") added support for PCI I/O space mapped into CPU physical memory space. With that support, the I/O ranges configured for PCI/PCIe hosts on some architectures can be mapped to logical PIO and converted easily between CPU address and the corresponding logical PIO. Based on this, PCI I/O port space can be accessed via in/out accessors that use memory read/write. But on some platforms, there are bus hosts that access I/O port space with host-local I/O port addresses rather than memory addresses. Add a more generic I/O mapping method to support those devices. With this patch, both the CPU addresses and the host-local port can be mapped into the logical PIO space with different logical/fake PIOs. After this, all the I/O accesses to either PCI MMIO devices or host-local I/O peripherals can be unified into the existing I/O accessors defined in asm-generic/io.h and be redirected to the right device-specific hooks based on the input logical PIO. Tested-by: dann frazier Signed-off-by: Zhichang Yuan Signed-off-by: Gabriele Paoloni Signed-off-by: John Garry [bhelgaas: remove -EFAULT return from logic_pio_register_range() per https://lkml.kernel.org/r/20180403143909.GA21171@ulmo, fix NULL pointer checking per https://lkml.kernel.org/r/20180403211505.GA29612@embeddedor.com] Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko --- include/linux/logic_pio.h | 123 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 include/linux/logic_pio.h (limited to 'include/linux') diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h new file mode 100644 index 000000000000..cbd9d8495690 --- /dev/null +++ b/include/linux/logic_pio.h @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2017 HiSilicon Limited, All Rights Reserved. + * Author: Gabriele Paoloni + * Author: Zhichang Yuan + */ + +#ifndef __LINUX_LOGIC_PIO_H +#define __LINUX_LOGIC_PIO_H + +#include + +enum { + LOGIC_PIO_INDIRECT, /* Indirect IO flag */ + LOGIC_PIO_CPU_MMIO, /* Memory-mapped IO flag */ +}; + +struct logic_pio_hwaddr { + struct list_head list; + struct fwnode_handle *fwnode; + resource_size_t hw_start; + resource_size_t io_start; + resource_size_t size; /* range size populated */ + unsigned long flags; + + void *hostdata; + const struct logic_pio_host_ops *ops; +}; + +struct logic_pio_host_ops { + u32 (*in)(void *hostdata, unsigned long addr, size_t dwidth); + void (*out)(void *hostdata, unsigned long addr, u32 val, + size_t dwidth); + u32 (*ins)(void *hostdata, unsigned long addr, void *buffer, + size_t dwidth, unsigned int count); + void (*outs)(void *hostdata, unsigned long addr, const void *buffer, + size_t dwidth, unsigned int count); +}; + +#ifdef CONFIG_INDIRECT_PIO +u8 logic_inb(unsigned long addr); +void logic_outb(u8 value, unsigned long addr); +void logic_outw(u16 value, unsigned long addr); +void logic_outl(u32 value, unsigned long addr); +u16 logic_inw(unsigned long addr); +u32 logic_inl(unsigned long addr); +void logic_outb(u8 value, unsigned long addr); +void logic_outw(u16 value, unsigned long addr); +void logic_outl(u32 value, unsigned long addr); +void logic_insb(unsigned long addr, void *buffer, unsigned int count); +void logic_insl(unsigned long addr, void *buffer, unsigned int count); +void logic_insw(unsigned long addr, void *buffer, unsigned int count); +void logic_outsb(unsigned long addr, const void *buffer, unsigned int count); +void logic_outsw(unsigned long addr, const void *buffer, unsigned int count); +void logic_outsl(unsigned long addr, const void *buffer, unsigned int count); + +#ifndef inb +#define inb logic_inb +#endif + +#ifndef inw +#define inw logic_inw +#endif + +#ifndef inl +#define inl logic_inl +#endif + +#ifndef outb +#define outb logic_outb +#endif + +#ifndef outw +#define outw logic_outw +#endif + +#ifndef outl +#define outl logic_outl +#endif + +#ifndef insb +#define insb logic_insb +#endif + +#ifndef insw +#define insw logic_insw +#endif + +#ifndef insl +#define insl logic_insl +#endif + +#ifndef outsb +#define outsb logic_outsb +#endif + +#ifndef outsw +#define outsw logic_outsw +#endif + +#ifndef outsl +#define outsl logic_outsl +#endif + +/* + * We reserve 0x4000 bytes for Indirect IO as so far this library is only + * used by the HiSilicon LPC Host. If needed, we can reserve a wider IO + * area by redefining the macro below. + */ +#define PIO_INDIRECT_SIZE 0x4000 +#define MMIO_UPPER_LIMIT (IO_SPACE_LIMIT - PIO_INDIRECT_SIZE) +#else +#define MMIO_UPPER_LIMIT IO_SPACE_LIMIT +#endif /* CONFIG_INDIRECT_PIO */ + +struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode); +unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, + resource_size_t hw_addr, resource_size_t size); +int logic_pio_register_range(struct logic_pio_hwaddr *newrange); +resource_size_t logic_pio_to_hwaddr(unsigned long pio); +unsigned long logic_pio_trans_cpuaddr(resource_size_t hw_addr); + +#endif /* __LINUX_LOGIC_PIO_H */ -- cgit v1.2.3 From 6e2fb22103b99c26ae30a46512abe75526d8e4c9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 21 Mar 2018 12:42:25 -0400 Subject: block: use 32-bit blk_status_t on Alpha Early alpha processors cannot write a single byte or word; they read 8 bytes, modify the value in registers and write back 8 bytes. The type blk_status_t is defined as one byte, it is often written asynchronously by I/O completion routines, this asynchronous modification can corrupt content of nearby bytes if these nearby bytes can be written simultaneously by another CPU. - one example of such corruption is the structure dm_io where "blk_status_t status" is written by an asynchronous completion routine and "atomic_t io_count" is modified synchronously - another example is the structure dm_buffer where "unsigned hold_count" is modified synchronously from process context and "blk_status_t write_error" is modified asynchronously from bio completion routine This patch fixes the bug by changing the type blk_status_t to 32 bits if we are on Alpha and if we are compiling for a processor that doesn't have the byte-word-extension. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index bf18b95ed92d..17b18b91ebac 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -20,8 +20,13 @@ typedef void (bio_end_io_t) (struct bio *); /* * Block error status values. See block/blk-core:blk_errors for the details. + * Alpha cannot write a byte atomically, so we need to use 32-bit value. */ +#if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) +typedef u32 __bitwise blk_status_t; +#else typedef u8 __bitwise blk_status_t; +#endif #define BLK_STS_OK 0 #define BLK_STS_NOTSUPP ((__force blk_status_t)1) #define BLK_STS_TIMEOUT ((__force blk_status_t)2) -- cgit v1.2.3 From f2d9b66d84f3ff5ea3aff111e6a403e04fa8bf37 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 20 Mar 2018 15:57:02 +0300 Subject: drivers: base: Unified device connection lookup Several frameworks - clk, gpio, phy, pmw, etc. - maintain lookup tables for describing connections and provide custom API for handling them. This introduces a single generic lookup table and API for the connections. The motivation for this commit is centralizing the connection lookup, but the goal is to ultimately extract the connection descriptions also from firmware by using the fwnode_graph_* functions and other mechanisms that are available. Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index b093405ed525..204ff64279fd 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -729,6 +729,28 @@ struct device_dma_parameters { unsigned long segment_boundary_mask; }; +/** + * struct device_connection - Device Connection Descriptor + * @endpoint: The names of the two devices connected together + * @id: Unique identifier for the connection + * @list: List head, private, for internal use only + */ +struct device_connection { + const char *endpoint[2]; + const char *id; + struct list_head list; +}; + +void *device_connection_find_match(struct device *dev, const char *con_id, + void *data, + void *(*match)(struct device_connection *con, + int ep, void *data)); + +struct device *device_connection_find(struct device *dev, const char *con_id); + +void device_connection_add(struct device_connection *con); +void device_connection_remove(struct device_connection *con); + /** * enum device_link_state - Device link states. * @DL_STATE_NONE: The presence of the drivers is not being tracked. -- cgit v1.2.3 From bdecb33af34f79cbfbb656661210f77c8b8b5b5f Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 20 Mar 2018 15:57:03 +0300 Subject: usb: typec: API for controlling USB Type-C Multiplexers USB Type-C connectors consist of various muxes and switches that route the pins on the connector to the right locations. The USB Type-C drivers need to be able to control the muxes, as they are the ones that know things like the cable plug orientation, and the current mode that was negotiated with the partner. This introduces a small API for registering and controlling cable plug orientation switches, and separate small API for registering and controlling pin multiplexer/demultiplexer switches that are needed with Accessory/Alternate Modes. Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 14 +++++++++++ include/linux/usb/typec_mux.h | 55 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 include/linux/usb/typec_mux.h (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 0d44ce6af08f..2408e5c2ed91 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -60,6 +60,12 @@ enum typec_accessory { #define TYPEC_MAX_ACCESSORY 3 +enum typec_orientation { + TYPEC_ORIENTATION_NONE, + TYPEC_ORIENTATION_NORMAL, + TYPEC_ORIENTATION_REVERSE, +}; + /* * struct usb_pd_identity - USB Power Delivery identity data * @id_header: ID Header VDO @@ -185,6 +191,8 @@ struct typec_partner_desc { * @pd_revision: USB Power Delivery Specification revision if supported * @prefer_role: Initial role preference * @accessory: Supported Accessory Modes + * @sw: Cable plug orientation switch + * @mux: Multiplexer switch for Alternate/Accessory Modes * @fwnode: Optional fwnode of the port * @try_role: Set data role preference for DRP port * @dr_set: Set Data Role @@ -202,6 +210,8 @@ struct typec_capability { int prefer_role; enum typec_accessory accessory[TYPEC_MAX_ACCESSORY]; + struct typec_switch *sw; + struct typec_mux *mux; struct fwnode_handle *fwnode; int (*try_role)(const struct typec_capability *, @@ -245,4 +255,8 @@ void typec_set_pwr_role(struct typec_port *port, enum typec_role role); void typec_set_vconn_role(struct typec_port *port, enum typec_role role); void typec_set_pwr_opmode(struct typec_port *port, enum typec_pwr_opmode mode); +int typec_set_orientation(struct typec_port *port, + enum typec_orientation orientation); +int typec_set_mode(struct typec_port *port, int mode); + #endif /* __LINUX_USB_TYPEC_H */ diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h new file mode 100644 index 000000000000..12c1b057834b --- /dev/null +++ b/include/linux/usb/typec_mux.h @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __USB_TYPEC_MUX +#define __USB_TYPEC_MUX + +#include +#include + +struct device; + +/** + * struct typec_switch - USB Type-C cable orientation switch + * @dev: Switch device + * @entry: List entry + * @set: Callback to the driver for setting the orientation + * + * USB Type-C pin flipper switch routing the correct data pairs from the + * connector to the USB controller depending on the orientation of the cable + * plug. + */ +struct typec_switch { + struct device *dev; + struct list_head entry; + + int (*set)(struct typec_switch *sw, enum typec_orientation orientation); +}; + +/** + * struct typec_switch - USB Type-C connector pin mux + * @dev: Mux device + * @entry: List entry + * @set: Callback to the driver for setting the state of the mux + * + * Pin Multiplexer/DeMultiplexer switch routing the USB Type-C connector pins to + * different components depending on the requested mode of operation. Used with + * Accessory/Alternate modes. + */ +struct typec_mux { + struct device *dev; + struct list_head entry; + + int (*set)(struct typec_mux *mux, int state); +}; + +struct typec_switch *typec_switch_get(struct device *dev); +void typec_switch_put(struct typec_switch *sw); +int typec_switch_register(struct typec_switch *sw); +void typec_switch_unregister(struct typec_switch *sw); + +struct typec_mux *typec_mux_get(struct device *dev); +void typec_mux_put(struct typec_mux *mux); +int typec_mux_register(struct typec_mux *mux); +void typec_mux_unregister(struct typec_mux *mux); + +#endif /* __USB_TYPEC_MUX */ -- cgit v1.2.3 From fde0aa6c175a4d8aa19e82b86ae0f9278bc8563b Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 20 Mar 2018 15:57:04 +0300 Subject: usb: common: Small class for USB role switches USB role switch is a device that can be used to choose the data role for USB connector. With dual-role capable USB controllers, the controller itself will be the switch, but on some platforms the USB host and device controllers are separate IPs and there is a mux between them and the connector. On those platforms the mux driver will need to register the switch. With USB Type-C connectors, the host-to-device relationship is negotiated over the Configuration Channel (CC). That means the USB Type-C drivers need to be in control of the role switch. The class provides a simple API for the USB Type-C drivers for the control. For other types of USB connectors (mainly microAB) the class provides user space control via sysfs attribute file that can be used to request role swapping from the switch. Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/role.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/usb/role.h (limited to 'include/linux') diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h new file mode 100644 index 000000000000..edc51be4a77c --- /dev/null +++ b/include/linux/usb/role.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __LINUX_USB_ROLE_H +#define __LINUX_USB_ROLE_H + +#include + +struct usb_role_switch; + +enum usb_role { + USB_ROLE_NONE, + USB_ROLE_HOST, + USB_ROLE_DEVICE, +}; + +typedef int (*usb_role_switch_set_t)(struct device *dev, enum usb_role role); +typedef enum usb_role (*usb_role_switch_get_t)(struct device *dev); + +/** + * struct usb_role_switch_desc - USB Role Switch Descriptor + * @usb2_port: Optional reference to the host controller port device (USB2) + * @usb3_port: Optional reference to the host controller port device (USB3) + * @udc: Optional reference to the peripheral controller device + * @set: Callback for setting the role + * @get: Callback for getting the role (optional) + * @allow_userspace_control: If true userspace may change the role through sysfs + * + * @usb2_port and @usb3_port will point to the USB host port and @udc to the USB + * device controller behind the USB connector with the role switch. If + * @usb2_port, @usb3_port and @udc are included in the description, the + * reference count for them should be incremented by the caller of + * usb_role_switch_register() before registering the switch. + */ +struct usb_role_switch_desc { + struct device *usb2_port; + struct device *usb3_port; + struct device *udc; + usb_role_switch_set_t set; + usb_role_switch_get_t get; + bool allow_userspace_control; +}; + +int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role); +enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw); +struct usb_role_switch *usb_role_switch_get(struct device *dev); +void usb_role_switch_put(struct usb_role_switch *sw); + +struct usb_role_switch * +usb_role_switch_register(struct device *parent, + const struct usb_role_switch_desc *desc); +void usb_role_switch_unregister(struct usb_role_switch *sw); + +#endif /* __LINUX_USB_ROLE_H */ -- cgit v1.2.3 From ceeb162500c3480b660a47d509db7955a7913271 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 20 Mar 2018 15:57:05 +0300 Subject: usb: typec: Separate the definitions for data and power roles USB Type-C specification v1.2 separated the power and data roles more clearly. Dual-Role-Data term was introduced, and the meaning of DRP was changed from "Dual-Role-Port" to "Dual-Role-Power". In order to allow the port drivers to describe the capabilities of the ports more clearly according to the newest specifications, introducing separate definitions for the data roles. Reviewed-by: Guenter Roeck Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 1 + include/linux/usb/typec.h | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index ca1c0b57f03f..5a5e1d8c5b65 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -91,6 +91,7 @@ struct tcpc_config { unsigned int operating_snk_mw; enum typec_port_type type; + enum typec_port_data data; enum typec_role default_role; bool try_role_hw; /* try.{src,snk} implemented in hardware */ diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 2408e5c2ed91..672b39bb0adc 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -22,9 +22,15 @@ struct typec_port; struct fwnode_handle; enum typec_port_type { + TYPEC_PORT_SRC, + TYPEC_PORT_SNK, + TYPEC_PORT_DRP, +}; + +enum typec_port_data { TYPEC_PORT_DFP, TYPEC_PORT_UFP, - TYPEC_PORT_DRP, + TYPEC_PORT_DRD, }; enum typec_plug_type { @@ -186,10 +192,11 @@ struct typec_partner_desc { /* * struct typec_capability - USB Type-C Port Capabilities - * @role: DFP (Host-only), UFP (Device-only) or DRP (Dual Role) + * @type: Supported power role of the port + * @data: Supported data role of the port * @revision: USB Type-C Specification release. Binary coded decimal * @pd_revision: USB Power Delivery Specification revision if supported - * @prefer_role: Initial role preference + * @prefer_role: Initial role preference (DRP ports). * @accessory: Supported Accessory Modes * @sw: Cable plug orientation switch * @mux: Multiplexer switch for Alternate/Accessory Modes @@ -205,6 +212,7 @@ struct typec_partner_desc { */ struct typec_capability { enum typec_port_type type; + enum typec_port_data data; u16 revision; /* 0120H = "1.2" */ u16 pd_revision; /* 0300H = "3.0" */ int prefer_role; -- cgit v1.2.3 From c6962c29729cc64177f56a466766daa7de9f87ac Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 20 Mar 2018 15:57:06 +0300 Subject: usb: typec: tcpm: Set USB role switch to device mode when configured as such Setting the mux to MUX_NONE and the switch to USB_SWITCH_DISCONNECT when the data-role is device is not correct. Plenty of devices support operating as USB device through a (separate) USB device controller. We really need 2 different versions of USB_SWITCH_CONNECT, USB_SWITCH_CONNECT_HOST and USB_SWITCH_DEVICE. Rather then modifying the tcpc_usb_switch enum for this, simply remove it and switch to the usb_role enum which provides exactly this, this will save use needing to convert betweent the 2 enums when calling an usb-role-switch driver later. Besides switching to the usb_role type, this commit also actually sets the mux to TYPEC_MUX_USB and the switch to USB_ROLE_DEVICE instead of setting both to none when the data-role is device. This commit also makes tcpm_reset_port() call tcpm_mux_set(port, TYPEC_MUX_NONE, USB_ROLE_NONE) so that the mux and switch do _not_ stay in their last mode after a detach. Signed-off-by: Hans de Goede Reviewed-by: Guenter Roeck Reviewed-by: Andy Shevchenko Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 5a5e1d8c5b65..3e8bdaa5085a 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -16,6 +16,7 @@ #define __LINUX_USB_TCPM_H #include +#include #include #include "pd.h" @@ -98,11 +99,6 @@ struct tcpc_config { const struct typec_altmode_desc *alt_modes; }; -enum tcpc_usb_switch { - TCPC_USB_SWITCH_CONNECT, - TCPC_USB_SWITCH_DISCONNECT, -}; - /* Mux state attributes */ #define TCPC_MUX_USB_ENABLED BIT(0) /* USB enabled */ #define TCPC_MUX_DP_ENABLED BIT(1) /* DP enabled */ @@ -119,7 +115,7 @@ enum tcpc_mux_mode { struct tcpc_mux_dev { int (*set)(struct tcpc_mux_dev *dev, enum tcpc_mux_mode mux_mode, - enum tcpc_usb_switch usb_config, + enum usb_role usb_role, enum typec_cc_polarity polarity); bool dfp_only; void *priv_data; -- cgit v1.2.3 From 2000016c94b4f724cb5851486b9f9a94e8da32fc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 20 Mar 2018 15:57:07 +0300 Subject: usb: typec: tcpm: Use new Type-C switch/mux and usb-role-switch functions Remove the unused (not implemented anywhere) tcpc_mux_dev abstraction and replace it with calling the new typec_set_orientation, usb_role_switch_set and typec_set_mode functions. Signed-off-by: Hans de Goede Reviewed-by: Guenter Roeck Reviewed-by: Andy Shevchenko Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 3e8bdaa5085a..f0d839daeaea 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -16,7 +16,6 @@ #define __LINUX_USB_TCPM_H #include -#include #include #include "pd.h" @@ -113,14 +112,6 @@ enum tcpc_mux_mode { TCPC_MUX_DP_ENABLED, }; -struct tcpc_mux_dev { - int (*set)(struct tcpc_mux_dev *dev, enum tcpc_mux_mode mux_mode, - enum usb_role usb_role, - enum typec_cc_polarity polarity); - bool dfp_only; - void *priv_data; -}; - /** * struct tcpc_dev - Port configuration and callback functions * @config: Pointer to port configuration @@ -172,7 +163,6 @@ struct tcpc_dev { int (*try_role)(struct tcpc_dev *dev, int role); int (*pd_transmit)(struct tcpc_dev *dev, enum tcpm_transmit_type type, const struct pd_message *msg); - struct tcpc_mux_dev *mux; }; struct tcpm_port; -- cgit v1.2.3 From 5df7af85ecd88e8b5f1f31d6456c3cf38a8bbdda Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 20 Mar 2018 09:44:52 +0800 Subject: net: phy: Add general dummy stubs for MMD register access For some phy devices, even though they don't support the MMD extended register access, it does have some side effect if we are trying to read/write the MMD registers via indirect method. So introduce general dummy stubs for MMD register access which these devices can use to avoid such side effect. Fixes: b6b5e8a69118 ("gianfar: Disable EEE autoneg by default") Signed-off-by: Kevin Hao Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index b260fb336b25..7c4c2379e010 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -984,6 +984,10 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev) { return 0; } +int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, + u16 regnum); +int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, + u16 regnum, u16 val); /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); -- cgit v1.2.3 From 5d42c96e1cf98bdfea18e7d32e5f6cf75aac93b9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 21 Mar 2018 15:34:29 -0700 Subject: firmware: add firmware_request_cache() to help with cache on reboot Some devices have an optimization in place to enable the firmware to be retaineed during a system reboot, so after reboot the device can skip requesting and loading the firmware. This can save up to 1s in load time. The mt7601u 802.11 device happens to be such a device. When these devices retain the firmware on a reboot and then suspend they can miss looking for the firmware on resume. To help with this we need a way to cache the firmware when such an optimization has taken place. Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index d4508080348d..41050417cafb 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -85,4 +85,7 @@ static inline int request_firmware_into_buf(const struct firmware **firmware_p, } #endif + +int firmware_request_cache(struct device *device, const char *name); + #endif -- cgit v1.2.3 From f59f1caf72ba00d519c793c3deb32cd3be32edc2 Mon Sep 17 00:00:00 2001 From: Daniel Vacek Date: Thu, 22 Mar 2018 16:17:38 -0700 Subject: Revert "mm: page_alloc: skip over regions of invalid pfns where possible" This reverts commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible"). The commit is meant to be a boot init speed up skipping the loop in memmap_init_zone() for invalid pfns. But given some specific memory mapping on x86_64 (or more generally theoretically anywhere but on arm with CONFIG_HAVE_ARCH_PFN_VALID) the implementation also skips valid pfns which is plain wrong and causes 'kernel BUG at mm/page_alloc.c:1389!' crash> log | grep -e BUG -e RIP -e Call.Trace -e move_freepages_block -e rmqueue -e freelist -A1 kernel BUG at mm/page_alloc.c:1389! invalid opcode: 0000 [#1] SMP -- RIP: 0010: move_freepages+0x15e/0x160 -- Call Trace: move_freepages_block+0x73/0x80 __rmqueue+0x263/0x460 get_page_from_freelist+0x7e1/0x9e0 __alloc_pages_nodemask+0x176/0x420 -- crash> page_init_bug -v | grep RAM 1000 - 9bfff System RAM (620.00 KiB) 100000 - 430bffff System RAM ( 1.05 GiB = 1071.75 MiB = 1097472.00 KiB) 4b0c8000 - 4bf9cfff System RAM ( 14.83 MiB = 15188.00 KiB) 4bfac000 - 646b1fff System RAM (391.02 MiB = 400408.00 KiB) 7b788000 - 7b7fffff System RAM (480.00 KiB) 100000000 - 67fffffff System RAM ( 22.00 GiB) crash> page_init_bug | head -6 7b788000 - 7b7fffff System RAM (480.00 KiB) 1fffff00000000 0 1 DMA32 4096 1048575 505736 505344 505855 0 0 0 DMA 1 4095 1fffff00000400 0 1 DMA32 4096 1048575 BUG, zones differ! crash> kmem -p 77fff000 78000000 7b5ff000 7b600000 7b787000 7b788000 PAGE PHYSICAL MAPPING INDEX CNT FLAGS ffffea0001e00000 78000000 0 0 0 0 ffffea0001ed7fc0 7b5ff000 0 0 0 0 ffffea0001ed8000 7b600000 0 0 0 0 <<<< ffffea0001ede1c0 7b787000 0 0 0 0 ffffea0001ede200 7b788000 0 0 1 1fffff00000000 Link: http://lkml.kernel.org/r/20180316143855.29838-1-neelx@redhat.com Fixes: b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible") Signed-off-by: Daniel Vacek Acked-by: Ard Biesheuvel Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Vlastimil Babka Cc: Mel Gorman Cc: Pavel Tatashin Cc: Paul Burton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8be5077efb5f..f92ea7783652 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -187,7 +187,6 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, unsigned long *out_end_pfn, int *out_nid); -unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn); /** * for_each_mem_pfn_range - early memory pfn range iterator -- cgit v1.2.3 From aefad9593ec5ad4aae5346253a8b646364cd7317 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 20:52:43 -0500 Subject: sem/security: Pass kern_ipc_perm not sem_array into the sem security hooks All of the implementations of security hooks that take sem_array only access sem_perm the struct kern_ipc_perm member. This means the dependencies of the sem security hooks can be simplified by passing the kern_ipc_perm member of sem_array. Making this change will allow struct sem and struct sem_array to become private to ipc/sem.c. Signed-off-by: "Eric W. Biederman" --- include/linux/lsm_hooks.h | 10 +++++----- include/linux/security.h | 21 ++++++++++----------- 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7161d8e7ee79..e4a94863a88c 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1592,11 +1592,11 @@ union security_list_options { int (*shm_shmat)(struct shmid_kernel *shp, char __user *shmaddr, int shmflg); - int (*sem_alloc_security)(struct sem_array *sma); - void (*sem_free_security)(struct sem_array *sma); - int (*sem_associate)(struct sem_array *sma, int semflg); - int (*sem_semctl)(struct sem_array *sma, int cmd); - int (*sem_semop)(struct sem_array *sma, struct sembuf *sops, + int (*sem_alloc_security)(struct kern_ipc_perm *sma); + void (*sem_free_security)(struct kern_ipc_perm *sma); + int (*sem_associate)(struct kern_ipc_perm *sma, int semflg); + int (*sem_semctl)(struct kern_ipc_perm *sma, int cmd); + int (*sem_semop)(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); int (*netlink_send)(struct sock *sk, struct sk_buff *skb); diff --git a/include/linux/security.h b/include/linux/security.h index 73f1ef625d40..fa7adac4b99a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -36,7 +36,6 @@ struct linux_binprm; struct cred; struct rlimit; struct siginfo; -struct sem_array; struct sembuf; struct kern_ipc_perm; struct audit_context; @@ -368,11 +367,11 @@ void security_shm_free(struct shmid_kernel *shp); int security_shm_associate(struct shmid_kernel *shp, int shmflg); int security_shm_shmctl(struct shmid_kernel *shp, int cmd); int security_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, int shmflg); -int security_sem_alloc(struct sem_array *sma); -void security_sem_free(struct sem_array *sma); -int security_sem_associate(struct sem_array *sma, int semflg); -int security_sem_semctl(struct sem_array *sma, int cmd); -int security_sem_semop(struct sem_array *sma, struct sembuf *sops, +int security_sem_alloc(struct kern_ipc_perm *sma); +void security_sem_free(struct kern_ipc_perm *sma); +int security_sem_associate(struct kern_ipc_perm *sma, int semflg); +int security_sem_semctl(struct kern_ipc_perm *sma, int cmd); +int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); int security_getprocattr(struct task_struct *p, char *name, char **value); @@ -1103,25 +1102,25 @@ static inline int security_shm_shmat(struct shmid_kernel *shp, return 0; } -static inline int security_sem_alloc(struct sem_array *sma) +static inline int security_sem_alloc(struct kern_ipc_perm *sma) { return 0; } -static inline void security_sem_free(struct sem_array *sma) +static inline void security_sem_free(struct kern_ipc_perm *sma) { } -static inline int security_sem_associate(struct sem_array *sma, int semflg) +static inline int security_sem_associate(struct kern_ipc_perm *sma, int semflg) { return 0; } -static inline int security_sem_semctl(struct sem_array *sma, int cmd) +static inline int security_sem_semctl(struct kern_ipc_perm *sma, int cmd) { return 0; } -static inline int security_sem_semop(struct sem_array *sma, +static inline int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter) { -- cgit v1.2.3 From 7191adff2a5566efb139c79ea03eda3d0520d44a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:08:27 -0500 Subject: shm/security: Pass kern_ipc_perm not shmid_kernel into the shm security hooks All of the implementations of security hooks that take shmid_kernel only access shm_perm the struct kern_ipc_perm member. This means the dependencies of the shm security hooks can be simplified by passing the kern_ipc_perm member of shmid_kernel.. Making this change will allow struct shmid_kernel to become private to ipc/shm.c. Signed-off-by: "Eric W. Biederman" --- include/linux/lsm_hooks.h | 10 +++++----- include/linux/security.h | 21 ++++++++++----------- 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e4a94863a88c..cac7a8082c43 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1585,11 +1585,11 @@ union security_list_options { struct task_struct *target, long type, int mode); - int (*shm_alloc_security)(struct shmid_kernel *shp); - void (*shm_free_security)(struct shmid_kernel *shp); - int (*shm_associate)(struct shmid_kernel *shp, int shmflg); - int (*shm_shmctl)(struct shmid_kernel *shp, int cmd); - int (*shm_shmat)(struct shmid_kernel *shp, char __user *shmaddr, + int (*shm_alloc_security)(struct kern_ipc_perm *shp); + void (*shm_free_security)(struct kern_ipc_perm *shp); + int (*shm_associate)(struct kern_ipc_perm *shp, int shmflg); + int (*shm_shmctl)(struct kern_ipc_perm *shp, int cmd); + int (*shm_shmat)(struct kern_ipc_perm *shp, char __user *shmaddr, int shmflg); int (*sem_alloc_security)(struct kern_ipc_perm *sma); diff --git a/include/linux/security.h b/include/linux/security.h index fa7adac4b99a..f390755808ea 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -49,7 +49,6 @@ struct qstr; struct iattr; struct fown_struct; struct file_operations; -struct shmid_kernel; struct msg_msg; struct msg_queue; struct xattr; @@ -362,11 +361,11 @@ int security_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg); int security_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, struct task_struct *target, long type, int mode); -int security_shm_alloc(struct shmid_kernel *shp); -void security_shm_free(struct shmid_kernel *shp); -int security_shm_associate(struct shmid_kernel *shp, int shmflg); -int security_shm_shmctl(struct shmid_kernel *shp, int cmd); -int security_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, int shmflg); +int security_shm_alloc(struct kern_ipc_perm *shp); +void security_shm_free(struct kern_ipc_perm *shp); +int security_shm_associate(struct kern_ipc_perm *shp, int shmflg); +int security_shm_shmctl(struct kern_ipc_perm *shp, int cmd); +int security_shm_shmat(struct kern_ipc_perm *shp, char __user *shmaddr, int shmflg); int security_sem_alloc(struct kern_ipc_perm *sma); void security_sem_free(struct kern_ipc_perm *sma); int security_sem_associate(struct kern_ipc_perm *sma, int semflg); @@ -1077,26 +1076,26 @@ static inline int security_msg_queue_msgrcv(struct msg_queue *msq, return 0; } -static inline int security_shm_alloc(struct shmid_kernel *shp) +static inline int security_shm_alloc(struct kern_ipc_perm *shp) { return 0; } -static inline void security_shm_free(struct shmid_kernel *shp) +static inline void security_shm_free(struct kern_ipc_perm *shp) { } -static inline int security_shm_associate(struct shmid_kernel *shp, +static inline int security_shm_associate(struct kern_ipc_perm *shp, int shmflg) { return 0; } -static inline int security_shm_shmctl(struct shmid_kernel *shp, int cmd) +static inline int security_shm_shmctl(struct kern_ipc_perm *shp, int cmd) { return 0; } -static inline int security_shm_shmat(struct shmid_kernel *shp, +static inline int security_shm_shmat(struct kern_ipc_perm *shp, char __user *shmaddr, int shmflg) { return 0; -- cgit v1.2.3 From d8c6e8543294428426578d74dc7aaf121e762d58 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:22:26 -0500 Subject: msg/security: Pass kern_ipc_perm not msg_queue into the msg_queue security hooks All of the implementations of security hooks that take msg_queue only access q_perm the struct kern_ipc_perm member. This means the dependencies of the msg_queue security hooks can be simplified by passing the kern_ipc_perm member of msg_queue. Making this change will allow struct msg_queue to become private to ipc/msg.c. Signed-off-by: "Eric W. Biederman" --- include/linux/lsm_hooks.h | 12 ++++++------ include/linux/security.h | 25 ++++++++++++------------- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index cac7a8082c43..bde167fa2c51 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1575,13 +1575,13 @@ union security_list_options { int (*msg_msg_alloc_security)(struct msg_msg *msg); void (*msg_msg_free_security)(struct msg_msg *msg); - int (*msg_queue_alloc_security)(struct msg_queue *msq); - void (*msg_queue_free_security)(struct msg_queue *msq); - int (*msg_queue_associate)(struct msg_queue *msq, int msqflg); - int (*msg_queue_msgctl)(struct msg_queue *msq, int cmd); - int (*msg_queue_msgsnd)(struct msg_queue *msq, struct msg_msg *msg, + int (*msg_queue_alloc_security)(struct kern_ipc_perm *msq); + void (*msg_queue_free_security)(struct kern_ipc_perm *msq); + int (*msg_queue_associate)(struct kern_ipc_perm *msq, int msqflg); + int (*msg_queue_msgctl)(struct kern_ipc_perm *msq, int cmd); + int (*msg_queue_msgsnd)(struct kern_ipc_perm *msq, struct msg_msg *msg, int msqflg); - int (*msg_queue_msgrcv)(struct msg_queue *msq, struct msg_msg *msg, + int (*msg_queue_msgrcv)(struct kern_ipc_perm *msq, struct msg_msg *msg, struct task_struct *target, long type, int mode); diff --git a/include/linux/security.h b/include/linux/security.h index f390755808ea..128e1e4a5346 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -50,7 +50,6 @@ struct iattr; struct fown_struct; struct file_operations; struct msg_msg; -struct msg_queue; struct xattr; struct xfrm_sec_ctx; struct mm_struct; @@ -353,13 +352,13 @@ int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); int security_msg_msg_alloc(struct msg_msg *msg); void security_msg_msg_free(struct msg_msg *msg); -int security_msg_queue_alloc(struct msg_queue *msq); -void security_msg_queue_free(struct msg_queue *msq); -int security_msg_queue_associate(struct msg_queue *msq, int msqflg); -int security_msg_queue_msgctl(struct msg_queue *msq, int cmd); -int security_msg_queue_msgsnd(struct msg_queue *msq, +int security_msg_queue_alloc(struct kern_ipc_perm *msq); +void security_msg_queue_free(struct kern_ipc_perm *msq); +int security_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg); +int security_msg_queue_msgctl(struct kern_ipc_perm *msq, int cmd); +int security_msg_queue_msgsnd(struct kern_ipc_perm *msq, struct msg_msg *msg, int msqflg); -int security_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, +int security_msg_queue_msgrcv(struct kern_ipc_perm *msq, struct msg_msg *msg, struct task_struct *target, long type, int mode); int security_shm_alloc(struct kern_ipc_perm *shp); void security_shm_free(struct kern_ipc_perm *shp); @@ -1043,32 +1042,32 @@ static inline int security_msg_msg_alloc(struct msg_msg *msg) static inline void security_msg_msg_free(struct msg_msg *msg) { } -static inline int security_msg_queue_alloc(struct msg_queue *msq) +static inline int security_msg_queue_alloc(struct kern_ipc_perm *msq) { return 0; } -static inline void security_msg_queue_free(struct msg_queue *msq) +static inline void security_msg_queue_free(struct kern_ipc_perm *msq) { } -static inline int security_msg_queue_associate(struct msg_queue *msq, +static inline int security_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg) { return 0; } -static inline int security_msg_queue_msgctl(struct msg_queue *msq, int cmd) +static inline int security_msg_queue_msgctl(struct kern_ipc_perm *msq, int cmd) { return 0; } -static inline int security_msg_queue_msgsnd(struct msg_queue *msq, +static inline int security_msg_queue_msgsnd(struct kern_ipc_perm *msq, struct msg_msg *msg, int msqflg) { return 0; } -static inline int security_msg_queue_msgrcv(struct msg_queue *msq, +static inline int security_msg_queue_msgrcv(struct kern_ipc_perm *msq, struct msg_msg *msg, struct task_struct *target, long type, int mode) -- cgit v1.2.3 From 1a5c1349d105df5196ad9025e271b02a4dc05aee Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:30:56 -0500 Subject: sem: Move struct sem and struct sem_array into ipc/sem.c All of the users are now in ipc/sem.c so make the definitions local to that file to make code maintenance easier. AKA to prevent rebuilding the entire kernel when one of these files is changed. Signed-off-by: "Eric W. Biederman" --- include/linux/sem.h | 40 +--------------------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sem.h b/include/linux/sem.h index 9badd322dcee..5608a500c43e 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -2,48 +2,10 @@ #ifndef _LINUX_SEM_H #define _LINUX_SEM_H -#include -#include -#include -#include #include struct task_struct; - -/* One semaphore structure for each semaphore in the system. */ -struct sem { - int semval; /* current value */ - /* - * PID of the process that last modified the semaphore. For - * Linux, specifically these are: - * - semop - * - semctl, via SETVAL and SETALL. - * - at task exit when performing undo adjustments (see exit_sem). - */ - int sempid; - spinlock_t lock; /* spinlock for fine-grained semtimedop */ - struct list_head pending_alter; /* pending single-sop operations */ - /* that alter the semaphore */ - struct list_head pending_const; /* pending single-sop operations */ - /* that do not alter the semaphore*/ - time_t sem_otime; /* candidate for sem_otime */ -} ____cacheline_aligned_in_smp; - -/* One sem_array data structure for each set of semaphores in the system. */ -struct sem_array { - struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ - time64_t sem_ctime; /* create/last semctl() time */ - struct list_head pending_alter; /* pending operations */ - /* that alter the array */ - struct list_head pending_const; /* pending complex operations */ - /* that do not alter semvals */ - struct list_head list_id; /* undo requests on this array */ - int sem_nsems; /* no. of semaphores in array */ - int complex_count; /* pending complex operations */ - unsigned int use_global_lock;/* >0: global lock required */ - - struct sem sems[]; -} __randomize_layout; +struct sem_undo_list; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3 From aaeab02ddcc830e31c33cdb72a3c117b2d499ae2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 23 Mar 2018 13:44:06 +1100 Subject: usb/gadget: Add an EP dispose() callback for EP lifetime tracking Some UDC may want to allocate endpoints dynamically, either because the HW supports an arbitrary large number or because (like the Aspeed BMC SoCs), the pool of HW endpoints is shared between multiple gadgets. The allocation side can be done rather easily using the existing match_ep() UDC hook. However we have no good place to "free" them. This implements a "simple" variant of this, which calls an EP dispose callback on all EPs associated with a gadget when the composite device gets unbound. This is required by my upcoming Aspeed vHub driver. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 66a5cff7ee14..e3424234b23a 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -129,6 +129,7 @@ struct usb_ep_ops { int (*enable) (struct usb_ep *ep, const struct usb_endpoint_descriptor *desc); int (*disable) (struct usb_ep *ep); + void (*dispose) (struct usb_ep *ep); struct usb_request *(*alloc_request) (struct usb_ep *ep, gfp_t gfp_flags); -- cgit v1.2.3 From 888d867df4417deffc33927e6fc2c6925736fe92 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Mon, 5 Mar 2018 13:34:49 +0200 Subject: tpm: cmd_ready command can be issued only after granting locality The correct sequence is to first request locality and only after that perform cmd_ready handshake, otherwise the hardware will drop the subsequent message as from the device point of view the cmd_ready handshake wasn't performed. Symmetrically locality has to be relinquished only after going idle handshake has completed, this requires that go_idle has to poll for the completion and as well locality relinquish has to poll for completion so it is not overridden in back to back commands flow. Two wrapper functions are added (request_locality relinquish_locality) to simplify the error handling. The issue is only visible on devices that support multiple localities. Fixes: 877c57d0d0ca ("tpm_crb: request and relinquish locality 0") Signed-off-by: Tomas Winkler Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index bcdd3790e94d..06639fb6ab85 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -44,7 +44,7 @@ struct tpm_class_ops { bool (*update_timeouts)(struct tpm_chip *chip, unsigned long *timeout_cap); int (*request_locality)(struct tpm_chip *chip, int loc); - void (*relinquish_locality)(struct tpm_chip *chip, int loc); + int (*relinquish_locality)(struct tpm_chip *chip, int loc); void (*clk_enable)(struct tpm_chip *chip, bool value); }; -- cgit v1.2.3 From 6eb486b66a3094cdcd68dc39c9df3a29d6a51dd5 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Wed, 21 Mar 2018 20:58:49 -0500 Subject: irqchip/gic-v3: Ensure GICR_CTLR.EnableLPI=0 is observed before enabling Booting with GICR_CTLR.EnableLPI=1 is usually a bad idea, and may result in subtle memory corruption. Detecting this is thus pretty important. On detecting that LPIs are still enabled, we taint the kernel (because we're not sure of anything anymore), and try to disable LPIs. This can fail, as implementations are allowed to implement GICR_CTLR.EnableLPI as a one-way enable, meaning the redistributors cannot be reprogrammed with new tables. Should this happen, we fail probing the redistributor and warn the user that things are pretty dire. Signed-off-by: Shanker Donthineni [maz: reworded changelog, minor comment and message changes] Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 9aacea2aa938..5988473e4abf 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -106,6 +106,7 @@ #define GICR_PIDR2 GICD_PIDR2 #define GICR_CTLR_ENABLE_LPIS (1UL << 0) +#define GICR_CTLR_RWP (1UL << 3) #define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff) -- cgit v1.2.3 From 21e9b3e931f78497b19b1f8f3d59d19412c1a28f Mon Sep 17 00:00:00 2001 From: Andrew Chant Date: Thu, 22 Mar 2018 14:39:55 -0700 Subject: ALSA: usb-audio: fix uac control query argument This patch fixes code readability and should have no functional change. Correct uac control query functions to account for the 1-based indexing of USB Audio Class control identifiers. The function parameter, u8 control, should be the constant defined in audio-v2.h to identify the control to be checked for readability or writeability. This patch fixes all callers that had adjusted, and makes explicit the mapping between audio_feature_info[] array index and the associated control identifier. Signed-off-by: Andrew Chant Signed-off-by: Takashi Iwai --- include/linux/usb/audio-v2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index 2db83a191e78..aaafecf073ff 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -36,12 +36,12 @@ static inline bool uac_v2v3_control_is_readable(u32 bmControls, u8 control) { - return (bmControls >> (control * 2)) & 0x1; + return (bmControls >> ((control - 1) * 2)) & 0x1; } static inline bool uac_v2v3_control_is_writeable(u32 bmControls, u8 control) { - return (bmControls >> (control * 2)) & 0x2; + return (bmControls >> ((control - 1) * 2)) & 0x2; } /* 4.7.2 Class-Specific AC Interface Descriptor */ -- cgit v1.2.3 From 3ec30113264a7bcd389f51d1738e42da0f41bb5a Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 8 Jan 2018 13:36:19 -0800 Subject: security: Add a cred_getsecid hook For IMA purposes, we want to be able to obtain the prepared secid in the bprm structure before the credentials are committed. Add a cred_getsecid hook that makes this possible. Signed-off-by: Matthew Garrett Acked-by: Paul Moore Cc: Paul Moore Cc: Stephen Smalley Cc: Casey Schaufler Signed-off-by: Mimi Zohar --- include/linux/lsm_hooks.h | 6 ++++++ include/linux/security.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e0ac011d07a5..bbc6a1240b2e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -554,6 +554,10 @@ * @new points to the new credentials. * @old points to the original credentials. * Transfer data from original creds to new creds + * @cred_getsecid: + * Retrieve the security identifier of the cred structure @c + * @c contains the credentials, secid will be placed into @secid. + * In case of failure, @secid will be set to zero. * @kernel_act_as: * Set the credentials for a kernel service to act as (subjective context). * @new points to the credentials to be modified. @@ -1542,6 +1546,7 @@ union security_list_options { int (*cred_prepare)(struct cred *new, const struct cred *old, gfp_t gfp); void (*cred_transfer)(struct cred *new, const struct cred *old); + void (*cred_getsecid)(const struct cred *c, u32 *secid); int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); int (*kernel_module_request)(char *kmod_name); @@ -1825,6 +1830,7 @@ struct security_hook_heads { struct list_head cred_free; struct list_head cred_prepare; struct list_head cred_transfer; + struct list_head cred_getsecid; struct list_head kernel_act_as; struct list_head kernel_create_files_as; struct list_head kernel_read_file; diff --git a/include/linux/security.h b/include/linux/security.h index 3f5fd988ee87..116b8717a98c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -324,6 +324,7 @@ int security_cred_alloc_blank(struct cred *cred, gfp_t gfp); void security_cred_free(struct cred *cred); int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); void security_transfer_creds(struct cred *new, const struct cred *old); +void security_cred_getsecid(const struct cred *c, u32 *secid); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); -- cgit v1.2.3 From 57b56ac6fecb05c3192586e4892572dd13d972de Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 21 Feb 2018 11:33:37 -0500 Subject: ima: fail file signature verification on non-init mounted filesystems FUSE can be mounted by unprivileged users either today with fusermount installed with setuid, or soon with the upcoming patches to allow FUSE mounts in a non-init user namespace. This patch addresses the new unprivileged non-init mounted filesystems, which are untrusted, by failing the signature verification. This patch defines two new flags SB_I_IMA_UNVERIFIABLE_SIGNATURE and SB_I_UNTRUSTED_MOUNTER. Signed-off-by: Mimi Zohar Cc: Miklos Szeredi Cc: Seth Forshee Cc: Dongsu Park Cc: Alban Crequy Acked-by: Serge Hallyn Acked-by: "Eric W. Biederman" --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c6baf767619e..d9e60824c374 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1321,6 +1321,8 @@ extern int send_sigurg(struct fown_struct *fown); /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ +#define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 +#define SB_I_UNTRUSTED_MOUNTER 0x00000040 /* Possible states of 'frozen' field */ enum { -- cgit v1.2.3 From 1c6ef16d38091a1820e98df25900b5977e404bbd Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 20 Mar 2018 12:04:47 +0100 Subject: HID: use BIT macro instead of plain integers for flags This can lead to some hairy situation with the developer losing a day or two realizing that 4 should be after 2, not 3. Signed-off-by: Benjamin Tissoires Reviewed-by: Dmitry Torokhov Acked-by: Peter Hutterer -- include/linux/hid.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) Signed-off-by: Jiri Kosina --- include/linux/hid.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 091a81cf330f..d104f2ebc809 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -26,6 +26,7 @@ #define __HID_H +#include #include #include #include @@ -494,13 +495,13 @@ struct hid_output_fifo { char *raw_report; }; -#define HID_CLAIMED_INPUT 1 -#define HID_CLAIMED_HIDDEV 2 -#define HID_CLAIMED_HIDRAW 4 -#define HID_CLAIMED_DRIVER 8 +#define HID_CLAIMED_INPUT BIT(0) +#define HID_CLAIMED_HIDDEV BIT(1) +#define HID_CLAIMED_HIDRAW BIT(2) +#define HID_CLAIMED_DRIVER BIT(3) -#define HID_STAT_ADDED 1 -#define HID_STAT_PARSED 2 +#define HID_STAT_ADDED BIT(0) +#define HID_STAT_PARSED BIT(1) struct hid_input { struct list_head list; -- cgit v1.2.3 From c30e5989d6926c5c1c77c87ed1e54f506e095d74 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 20 Mar 2018 12:04:48 +0100 Subject: HID: use BIT() macro for quirks too This should prevent future mess ups fortunately. Signed-off-by: Benjamin Tissoires Acked-by: Peter Hutterer -- include/linux/hid.h | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) Signed-off-by: Jiri Kosina --- include/linux/hid.h | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index d104f2ebc809..bc92005e5f08 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -311,13 +311,13 @@ struct hid_item { * HID connect requests */ -#define HID_CONNECT_HIDINPUT 0x01 -#define HID_CONNECT_HIDINPUT_FORCE 0x02 -#define HID_CONNECT_HIDRAW 0x04 -#define HID_CONNECT_HIDDEV 0x08 -#define HID_CONNECT_HIDDEV_FORCE 0x10 -#define HID_CONNECT_FF 0x20 -#define HID_CONNECT_DRIVER 0x40 +#define HID_CONNECT_HIDINPUT BIT(0) +#define HID_CONNECT_HIDINPUT_FORCE BIT(1) +#define HID_CONNECT_HIDRAW BIT(2) +#define HID_CONNECT_HIDDEV BIT(3) +#define HID_CONNECT_HIDDEV_FORCE BIT(4) +#define HID_CONNECT_FF BIT(5) +#define HID_CONNECT_DRIVER BIT(6) #define HID_CONNECT_DEFAULT (HID_CONNECT_HIDINPUT|HID_CONNECT_HIDRAW| \ HID_CONNECT_HIDDEV|HID_CONNECT_FF) @@ -330,25 +330,25 @@ struct hid_item { */ #define MAX_USBHID_BOOT_QUIRKS 4 -#define HID_QUIRK_INVERT 0x00000001 -#define HID_QUIRK_NOTOUCH 0x00000002 -#define HID_QUIRK_IGNORE 0x00000004 -#define HID_QUIRK_NOGET 0x00000008 -#define HID_QUIRK_HIDDEV_FORCE 0x00000010 -#define HID_QUIRK_BADPAD 0x00000020 -#define HID_QUIRK_MULTI_INPUT 0x00000040 -#define HID_QUIRK_HIDINPUT_FORCE 0x00000080 -#define HID_QUIRK_NO_EMPTY_INPUT 0x00000100 -/* 0x00000200 reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ -#define HID_QUIRK_ALWAYS_POLL 0x00000400 -#define HID_QUIRK_SKIP_OUTPUT_REPORTS 0x00010000 -#define HID_QUIRK_SKIP_OUTPUT_REPORT_ID 0x00020000 -#define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP 0x00040000 -#define HID_QUIRK_HAVE_SPECIAL_DRIVER 0x00080000 -#define HID_QUIRK_FULLSPEED_INTERVAL 0x10000000 -#define HID_QUIRK_NO_INIT_REPORTS 0x20000000 -#define HID_QUIRK_NO_IGNORE 0x40000000 -#define HID_QUIRK_NO_INPUT_SYNC 0x80000000 +#define HID_QUIRK_INVERT BIT(0) +#define HID_QUIRK_NOTOUCH BIT(1) +#define HID_QUIRK_IGNORE BIT(2) +#define HID_QUIRK_NOGET BIT(3) +#define HID_QUIRK_HIDDEV_FORCE BIT(4) +#define HID_QUIRK_BADPAD BIT(5) +#define HID_QUIRK_MULTI_INPUT BIT(6) +#define HID_QUIRK_HIDINPUT_FORCE BIT(7) +#define HID_QUIRK_NO_EMPTY_INPUT BIT(8) +/* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ +#define HID_QUIRK_ALWAYS_POLL BIT(10) +#define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) +#define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17) +#define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) +#define HID_QUIRK_HAVE_SPECIAL_DRIVER BIT(19) +#define HID_QUIRK_FULLSPEED_INTERVAL BIT(28) +#define HID_QUIRK_NO_INIT_REPORTS BIT(29) +#define HID_QUIRK_NO_IGNORE BIT(30) +#define HID_QUIRK_NO_INPUT_SYNC BIT(31) /* * HID device groups -- cgit v1.2.3 From 39335d1cbb8fb3260ac5f18fbcc45beb690e5ebd Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 20 Mar 2018 12:04:49 +0100 Subject: HID: core: remove the need for HID_QUIRK_NO_EMPTY_INPUT There is no real point of registering an empty input node. This should be default, but given some drivers need the blank input node to set it up during input_configured, we need to postpone the check for hidinput_has_been_populated(). Signed-off-by: Benjamin Tissoires Acked-by: Peter Hutterer Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index bc92005e5f08..b0db16fa7093 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -338,7 +338,7 @@ struct hid_item { #define HID_QUIRK_BADPAD BIT(5) #define HID_QUIRK_MULTI_INPUT BIT(6) #define HID_QUIRK_HIDINPUT_FORCE BIT(7) -#define HID_QUIRK_NO_EMPTY_INPUT BIT(8) +/* BIT(8) reserved for backward compatibility, was HID_QUIRK_NO_EMPTY_INPUT */ /* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ #define HID_QUIRK_ALWAYS_POLL BIT(10) #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) -- cgit v1.2.3 From 5b04cedeca188874d3267bc210ec10c337635ddd Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 15 Mar 2018 12:05:31 -0500 Subject: bus: fsl-mc: change mc_command in fsl_mc_command The "struct mc_command" is a very generic name for a global kernel structure. Change its name in "struct fsl_mc_command". Signed-off-by: Ioana Ciornei Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl/mc.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index cfb1fbf3a882..f27cb14088a4 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -209,7 +209,7 @@ struct mc_cmd_header { __le16 cmd_id; }; -struct mc_command { +struct fsl_mc_command { u64 header; u64 params[MC_CMD_NUM_OF_PARAMS]; }; @@ -256,7 +256,7 @@ static inline u64 mc_encode_cmd_header(u16 cmd_id, return header; } -static inline u16 mc_cmd_hdr_read_token(struct mc_command *cmd) +static inline u16 mc_cmd_hdr_read_token(struct fsl_mc_command *cmd) { struct mc_cmd_header *hdr = (struct mc_cmd_header *)&cmd->header; u16 token = le16_to_cpu(hdr->token); @@ -273,7 +273,7 @@ struct mc_rsp_api_ver { __le16 minor_ver; }; -static inline u32 mc_cmd_read_object_id(struct mc_command *cmd) +static inline u32 mc_cmd_read_object_id(struct fsl_mc_command *cmd) { struct mc_rsp_create *rsp_params; @@ -281,7 +281,7 @@ static inline u32 mc_cmd_read_object_id(struct mc_command *cmd) return le32_to_cpu(rsp_params->object_id); } -static inline void mc_cmd_read_api_version(struct mc_command *cmd, +static inline void mc_cmd_read_api_version(struct fsl_mc_command *cmd, u16 *major_ver, u16 *minor_ver) { @@ -342,7 +342,7 @@ struct fsl_mc_io { }; }; -int mc_send_command(struct fsl_mc_io *mc_io, struct mc_command *cmd); +int mc_send_command(struct fsl_mc_io *mc_io, struct fsl_mc_command *cmd); #ifdef CONFIG_FSL_MC_BUS #define dev_is_fsl_mc(_dev) ((_dev)->bus == &fsl_mc_bus_type) -- cgit v1.2.3 From 0063ec4459dcf1583c7aa84ada0f7125450d9245 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Wed, 14 Mar 2018 17:15:52 -0500 Subject: crypto: doc - Document remaining members in struct crypto_alg Add missing comments for union members ablkcipher, blkcipher, cipher, and compress. This silences complaints when building the htmldocs. Fixes: 0d7f488f0305a (crypto: doc - cipher data structures) Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/crypto.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7e6e84cf6383..6eb06101089f 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -435,6 +435,14 @@ struct compress_alg { * @cra_exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @cra_init, used to remove various changes set in * @cra_init. + * @cra_u.ablkcipher: Union member which contains an asynchronous block cipher + * definition. See @struct @ablkcipher_alg. + * @cra_u.blkcipher: Union member which contains a synchronous block cipher + * definition See @struct @blkcipher_alg. + * @cra_u.cipher: Union member which contains a single-block symmetric cipher + * definition. See @struct @cipher_alg. + * @cra_u.compress: Union member which contains a (de)compression algorithm. + * See @struct @compress_alg. * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE * @cra_list: internally used * @cra_users: internally used -- cgit v1.2.3 From d726f6b1997528354e1053accbb6223981e81802 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Tue, 13 Feb 2018 22:09:34 +0000 Subject: platform/x86: mlx-platform: Add deffered bus functionality mlx-platform activates i2c-mux-reg, which creates buses needed by mlxreg-hotplug. If the mlxreg-hotplug probe runs before the i2c-mux-reg probe completes, it may attempt to connect a device to an adapter number that has not been created yet, and fail. Make mlx-platform driver record the highest bus number in mlxreg-hotplug platform data and defer mlxreg-hotplug probe until all the buses are created. Signed-off-by: Vadim Pasternak [dvhart: rewrite commit message more concisely] Signed-off-by: Darren Hart (VMware) --- include/linux/platform_data/mlxreg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index fcdc707eab99..262910967476 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -129,6 +129,7 @@ struct mlxreg_core_platform_data { * @mask: top aggregation interrupt common mask; * @cell_low: location of low aggregation interrupt register; * @mask_low: low aggregation interrupt common mask; + * @deferred_nr: I2C adapter number must be exist prior probing execution; */ struct mlxreg_core_hotplug_platform_data { struct mlxreg_core_item *items; @@ -139,6 +140,7 @@ struct mlxreg_core_hotplug_platform_data { u32 mask; u32 cell_low; u32 mask_low; + int deferred_nr; }; #endif /* __LINUX_PLATFORM_DATA_MLXREG_H */ -- cgit v1.2.3 From ef0f62264b2a9e6fc73476ed22ade1ff1f3ad7f3 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Tue, 13 Feb 2018 22:09:36 +0000 Subject: platform/x86: mlx-platform: Add physical bus number auto detection mlx-platform does not provide a bus number to i2c-mlxcpld, assuming it is always one. On some x86 systems, other i2c drivers may probe before i2c-mlxcpld, causing bus one to be busy. Make mlx-platform determine which adapter number is free prior to activating i2c-mlxpld, adjusting the mux base numbers accordingly. Update the mlxreg-hotplug pdata similarly. This adds an explicit mlx-platform build dependency on I2C, update the Kconfig accordingly. Add the missing REGMAP dependency while we're at it. Signed-off-by: Vadim Pasternak [dvhart: Rewrite commit message more concisely] [dvhart: Add build dependencies] Signed-off-by: Darren Hart (VMware) --- include/linux/platform_data/mlxreg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index 262910967476..2744cff1b297 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -130,6 +130,7 @@ struct mlxreg_core_platform_data { * @cell_low: location of low aggregation interrupt register; * @mask_low: low aggregation interrupt common mask; * @deferred_nr: I2C adapter number must be exist prior probing execution; + * @shift_nr: I2C adapter numbers must be incremented by this value; */ struct mlxreg_core_hotplug_platform_data { struct mlxreg_core_item *items; @@ -141,6 +142,7 @@ struct mlxreg_core_hotplug_platform_data { u32 cell_low; u32 mask_low; int deferred_nr; + int shift_nr; }; #endif /* __LINUX_PLATFORM_DATA_MLXREG_H */ -- cgit v1.2.3 From eb49778c8c6cbe075cf90d741ccf16f674a8db4e Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 5 Jul 2017 22:13:58 +1200 Subject: i2c: pca-platform: drop gpio from platform data Now that the i2c-pca-plaform driver is using the device managed API for gpios there is no need for the reset gpio to be specified via i2c_pca9564_pf_platform_data. Signed-off-by: Chris Packham Signed-off-by: Wolfram Sang --- include/linux/i2c-pca-platform.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-pca-platform.h b/include/linux/i2c-pca-platform.h index 0e5f7c77d1d8..c37329432a8e 100644 --- a/include/linux/i2c-pca-platform.h +++ b/include/linux/i2c-pca-platform.h @@ -3,9 +3,6 @@ #define I2C_PCA9564_PLATFORM_H struct i2c_pca9564_pf_platform_data { - int gpio; /* pin to reset chip. driver will work when - * not supplied (negative value), but it - * cannot exit some error conditions then */ int i2c_clock_speed; /* values are defined in linux/i2c-algo-pca.h */ int timeout; /* timeout in jiffies */ }; -- cgit v1.2.3 From a2e102cd3cdd8b7a14e08716510707b15802073f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:34:44 -0500 Subject: shm: Move struct shmid_kernel into ipc/shm.c All of the users are now in ipc/shm.c so make the definition local to that file to make code maintenance easier. AKA to prevent rebuilding the entire kernel when struct shmid_kernel changes. Signed-off-by: "Eric W. Biederman" --- include/linux/shm.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shm.h b/include/linux/shm.h index 2bbafacfbfc9..3a8eae3ca33c 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -7,28 +7,6 @@ #include #include -struct shmid_kernel /* private to the kernel */ -{ - struct kern_ipc_perm shm_perm; - struct file *shm_file; - unsigned long shm_nattch; - unsigned long shm_segsz; - time64_t shm_atim; - time64_t shm_dtim; - time64_t shm_ctim; - pid_t shm_cprid; - pid_t shm_lprid; - struct user_struct *mlock_user; - - /* The task created the shm object. NULL if the task is dead. */ - struct task_struct *shm_creator; - struct list_head shm_clist; /* list by creator */ -} __randomize_layout; - -/* shm_mode upper byte flags */ -#define SHM_DEST 01000 /* segment will be destroyed on last detach */ -#define SHM_LOCKED 02000 /* segment will not be swapped */ - #ifdef CONFIG_SYSVIPC struct sysv_shm { struct list_head shm_clist; -- cgit v1.2.3 From 34b56df922b10ac2876f268c522951785bf333fd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:37:34 -0500 Subject: msg: Move struct msg_queue into ipc/msg.c All of the users are now in ipc/msg.c so make the definition local to that file to make code maintenance easier. AKA to prevent rebuilding the entire kernel when struct msg_queue changes. Signed-off-by: "Eric W. Biederman" --- include/linux/msg.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msg.h b/include/linux/msg.h index 0a7eefeee0d1..9a972a296b95 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -3,7 +3,6 @@ #define _LINUX_MSG_H #include -#include #include /* one msg_msg structure for each message */ @@ -16,21 +15,4 @@ struct msg_msg { /* the actual message follows immediately */ }; -/* one msq_queue structure for each present queue on the system */ -struct msg_queue { - struct kern_ipc_perm q_perm; - time64_t q_stime; /* last msgsnd time */ - time64_t q_rtime; /* last msgrcv time */ - time64_t q_ctime; /* last change time */ - unsigned long q_cbytes; /* current number of bytes on queue */ - unsigned long q_qnum; /* number of messages in queue */ - unsigned long q_qbytes; /* max number of bytes on queue */ - pid_t q_lspid; /* pid of last msgsnd */ - pid_t q_lrpid; /* last receive pid */ - - struct list_head q_messages; - struct list_head q_receivers; - struct list_head q_senders; -} __randomize_layout; - #endif /* _LINUX_MSG_H */ -- cgit v1.2.3 From f83a396d06d499029fe6d32e326605a2b5ca4eff Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:45:50 -0500 Subject: ipc: Move IPCMNI from include/ipc.h into ipc/util.h The definition IPCMNI is only used in ipc/util.h and ipc/util.c. So there is no reason to keep it in a header file that the whole kernel can see. Move it into util.h to simplify future maintenance. Signed-off-by: "Eric W. Biederman" --- include/linux/ipc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 821b2f260992..6cc2df7f7ac9 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -8,8 +8,6 @@ #include #include -#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ - /* used by in-kernel data structures */ struct kern_ipc_perm { spinlock_t lock; -- cgit v1.2.3 From 819671ff849b07b9831b91de879ddc5da4b333d4 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:25 +0100 Subject: syscalls: define and explain goal to not call syscalls in the kernel The syscall entry points to the kernel defined by SYSCALL_DEFINEx() and COMPAT_SYSCALL_DEFINEx() should only be called from userspace through kernel entry points, but not from the kernel itself. This will allow cleanups and optimizations to the entry paths *and* to the parts of the kernel code which currently need to pretend to be userspace in order to make use of syscalls. Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a78186d826d7..0526286a0314 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -941,4 +941,11 @@ asmlinkage long sys_pkey_free(int pkey); asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); + +/* + * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly. + * Instead, use one of the functions which work equivalently, such as + * the ksys_xyzyyz() functions prototyped below. + */ + #endif -- cgit v1.2.3 From b9193c1b61ddb97da4713155b0d580e41fb544ac Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 24 Mar 2018 11:44:22 -0700 Subject: bpf: Rename bpf_verifer_log bpf_verifer_log => bpf_verifier_log Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6b66cd1aa0b9..c30668414b22 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -153,7 +153,7 @@ struct bpf_insn_aux_data { #define BPF_VERIFIER_TMP_LOG_SIZE 1024 -struct bpf_verifer_log { +struct bpf_verifier_log { u32 level; char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; char __user *ubuf; @@ -161,7 +161,7 @@ struct bpf_verifer_log { u32 len_total; }; -static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log) +static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) { return log->len_used >= log->len_total - 1; } @@ -185,7 +185,7 @@ struct bpf_verifier_env { bool allow_ptr_leaks; bool seen_direct_write; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ - struct bpf_verifer_log log; + struct bpf_verifier_log log; u32 subprog_starts[BPF_MAX_SUBPROGS]; /* computes the stack depth of each bpf function */ u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1]; -- cgit v1.2.3 From 77d2e05abd45886dcad2b632c738cf46b9f7c19e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 24 Mar 2018 11:44:23 -0700 Subject: bpf: Add bpf_verifier_vlog() and bpf_verifier_log_needed() The BTF (BPF Type Format) verifier needs to reuse the current BPF verifier log. Hence, it requires the following changes: (1) Expose log_write() in verifier.c for other users. Its name is renamed to bpf_verifier_vlog(). (2) The BTF verifier also needs to check 'log->level && log->ubuf && !bpf_verifier_log_full(log);' independently outside of the current log_write(). It is because the BTF verifier will do one-check before making multiple calls to btf_verifier_vlog to log the details of a type. Hence, this check is also re-factored to a new function bpf_verifier_log_needed(). Since it is re-factored, we can check it before va_start() in the current bpf_verifier_log_write() and verbose(). Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c30668414b22..7e61c395fddf 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -166,6 +166,11 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) return log->len_used >= log->len_total - 1; } +static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) +{ + return log->level && log->ubuf && !bpf_verifier_log_full(log); +} + #define BPF_MAX_SUBPROGS 256 /* single container for all structs @@ -192,6 +197,8 @@ struct bpf_verifier_env { u32 subprog_cnt; }; +void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, + va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); -- cgit v1.2.3 From b91ed9d8082c394dda63f94f935219cd0a565938 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Fri, 16 Mar 2018 19:13:02 +0530 Subject: quota: Kill an unused extern entry form quota.h Kill an unused extern entry from quota.h which is leftover of below patch. [f32764bd2: quota: Convert quota statistics to generic percpu_counter] Signed-off-by: Ritesh Harjani Signed-off-by: Jan Kara --- include/linux/quota.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 5ac9de4fcd6f..ca9772c8e48b 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -267,7 +267,6 @@ struct dqstats { struct percpu_counter counter[_DQST_DQSTAT_LAST]; }; -extern struct dqstats *dqstats_pcpu; extern struct dqstats dqstats; static inline void dqstats_inc(unsigned int type) -- cgit v1.2.3 From df91f56adce1fc131e05368a0ad0ea72afd9a79a Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 8 Jan 2018 11:45:04 +0200 Subject: libcrc32c: Add crc32c_impl function This function returns a string with the currently in-use implementation of the crc32c algorithm, i.e crc32c-generic (for unoptimised, generic implementation) or crc32c-intel for the sse optimised version. This will be used by btrfs. Signed-off-by: Nikolay Borisov Acked-by: Herbert Xu [ use crypto_shash_driver_name as suggested by Herbert ] Signed-off-by: David Sterba --- include/linux/crc32c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h index 357ae4611a45..bd21af828ff6 100644 --- a/include/linux/crc32c.h +++ b/include/linux/crc32c.h @@ -5,6 +5,7 @@ #include extern u32 crc32c(u32 crc, const void *address, unsigned int length); +extern const char *crc32c_impl(void); /* This macro exists for backwards-compatibility. */ #define crc32c_le crc32c -- cgit v1.2.3 From a687a5337063af99ebd0eebaa6f4b4cf2e07c21b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Mar 2018 23:30:54 +0100 Subject: treewide: simplify Kconfig dependencies for removed archs A lot of Kconfig symbols have architecture specific dependencies. In those cases that depend on architectures we have already removed, they can be omitted. Acked-by: Kalle Valo Acked-by: Alexandre Belloni Signed-off-by: Arnd Bergmann --- include/linux/ide.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 20d42c0d9fb6..1d6f16110eae 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -25,15 +25,10 @@ #include #include -#if defined(CONFIG_CRIS) || defined(CONFIG_FRV) -# define SUPPORT_VLB_SYNC 0 -#else -# define SUPPORT_VLB_SYNC 1 -#endif - /* * Probably not wise to fiddle with these */ +#define SUPPORT_VLB_SYNC 1 #define IDE_DEFAULT_MAX_FAILURES 1 #define ERROR_MAX 8 /* Max read/write errors per sector */ #define ERROR_RESET 3 /* Reset controller every 4th retry */ -- cgit v1.2.3 From 768a032d0e73f962ec13cd05b722d9744d2cf903 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 18:21:04 +0100 Subject: net: adi: remove blackfin ethernet drivers The blackfin architecture is getting removed, so the bfin_mac driver is now obsolete. Acked-by: Dominik Brodowski Acked-by: Aaron Wu Signed-off-by: Arnd Bergmann --- include/linux/bfin_mac.h | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 include/linux/bfin_mac.h (limited to 'include/linux') diff --git a/include/linux/bfin_mac.h b/include/linux/bfin_mac.h deleted file mode 100644 index a69554ef8476..000000000000 --- a/include/linux/bfin_mac.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Blackfin On-Chip MAC Driver - * - * Copyright 2004-2010 Analog Devices Inc. - * - * Enter bugs at http://blackfin.uclinux.org/ - * - * Licensed under the GPL-2 or later. - */ - -#ifndef _LINUX_BFIN_MAC_H_ -#define _LINUX_BFIN_MAC_H_ - -#include - -struct bfin_phydev_platform_data { - unsigned short addr; - int irq; -}; - -struct bfin_mii_bus_platform_data { - int phydev_number; - struct bfin_phydev_platform_data *phydev_data; - const unsigned short *mac_peripherals; - int phy_mode; - unsigned int phy_mask; - unsigned short vlan1_mask, vlan2_mask; -}; - -#endif -- cgit v1.2.3 From 889ce12b1650b3c388634451872638a08faf6d6b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 16:05:23 +0100 Subject: raid: remove tile specific raid6 implementation The Tile architecture is getting removed, so we no longer need this either. Acked-by: Ard Biesheuvel Signed-off-by: Arnd Bergmann --- include/linux/raid/pq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 583cdd3d49ca..a366cc314479 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -105,7 +105,6 @@ extern const struct raid6_calls raid6_avx2x4; extern const struct raid6_calls raid6_avx512x1; extern const struct raid6_calls raid6_avx512x2; extern const struct raid6_calls raid6_avx512x4; -extern const struct raid6_calls raid6_tilegx8; extern const struct raid6_calls raid6_s390vx8; struct raid6_recov_calls { -- cgit v1.2.3 From 8cbfbae85085bdd0bdafc085b1ed14abe0349573 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 16:32:20 +0100 Subject: video/logo: remove obsolete logo files The blackfin and m32r architectures are getting removed, so it's time to clean up the logos as well. Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Arnd Bergmann --- include/linux/linux_logo.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h index 5e3581d76c7f..d4d5b93efe84 100644 --- a/include/linux/linux_logo.h +++ b/include/linux/linux_logo.h @@ -36,8 +36,6 @@ struct linux_logo { extern const struct linux_logo logo_linux_mono; extern const struct linux_logo logo_linux_vga16; extern const struct linux_logo logo_linux_clut224; -extern const struct linux_logo logo_blackfin_vga16; -extern const struct linux_logo logo_blackfin_clut224; extern const struct linux_logo logo_dec_clut224; extern const struct linux_logo logo_mac_clut224; extern const struct linux_logo logo_parisc_clut224; @@ -46,7 +44,6 @@ extern const struct linux_logo logo_sun_clut224; extern const struct linux_logo logo_superh_mono; extern const struct linux_logo logo_superh_vga16; extern const struct linux_logo logo_superh_clut224; -extern const struct linux_logo logo_m32r_clut224; extern const struct linux_logo logo_spe_clut224; extern const struct linux_logo *fb_find_logo(int depth); -- cgit v1.2.3 From e3ed8b436bc32102ac2995940bc3a63c09755b63 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 17:02:41 +0100 Subject: fbdev: remove blackfin drivers The blackfin architecture is getting removed, this removes the associated fbdev drivers as well. Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Aaron Wu Signed-off-by: Arnd Bergmann --- include/linux/fb.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index f577d3c89618..aa74a228bb92 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -571,8 +571,7 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { #elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || \ defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || \ - defined(__avr32__) || defined(__bfin__) || defined(__arm__) || \ - defined(__aarch64__) + defined(__arm__) || defined(__aarch64__) #define fb_readb __raw_readb #define fb_readw __raw_readw -- cgit v1.2.3 From f59b2dc2de63652cad750efc2ad1012eb1bb342f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 18:16:31 +0100 Subject: pinctrl: remove adi2/blackfin drivers The blackfin architecture is getting removed, so these are now obsolete. Acked-by: Aaron Wu Signed-off-by: Arnd Bergmann --- include/linux/platform_data/pinctrl-adi2.h | 40 ------------------------------ 1 file changed, 40 deletions(-) delete mode 100644 include/linux/platform_data/pinctrl-adi2.h (limited to 'include/linux') diff --git a/include/linux/platform_data/pinctrl-adi2.h b/include/linux/platform_data/pinctrl-adi2.h deleted file mode 100644 index 8f91300617ec..000000000000 --- a/include/linux/platform_data/pinctrl-adi2.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Pinctrl Driver for ADI GPIO2 controller - * - * Copyright 2007-2013 Analog Devices Inc. - * - * Licensed under the GPLv2 or later - */ - - -#ifndef PINCTRL_ADI2_H -#define PINCTRL_ADI2_H - -#include -#include - -/** - * struct adi_pinctrl_gpio_platform_data - Pinctrl gpio platform data - * for ADI GPIO2 device. - * - * @port_gpio_base: Optional global GPIO index of the GPIO bank. - * 0 means driver decides. - * @port_pin_base: Pin index of the pin controller device. - * @port_width: PIN number of the GPIO bank device - * @pint_id: GPIO PINT device id that this GPIO bank should map to. - * @pint_assign: The 32-bit GPIO PINT registers can be divided into 2 parts. A - * GPIO bank can be mapped into either low 16 bits[0] or high 16 - * bits[1] of each PINT register. - * @pint_map: GIOP bank mapping code in PINT device - */ -struct adi_pinctrl_gpio_platform_data { - unsigned int port_gpio_base; - unsigned int port_pin_base; - unsigned int port_width; - u8 pinctrl_id; - u8 pint_id; - bool pint_assign; - u8 pint_map; -}; - -#endif -- cgit v1.2.3 From c957ea5c797cfccffeee92e0af8e0e99212dd755 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 17:24:55 +0100 Subject: input: misc: remove blackfin rotary driver The blackfin architecture is getting removed, so this one is obsolete as well. Acked-by: Dmitry Torokhov Acked-by: Aaron Wu Signed-off-by: Arnd Bergmann --- include/linux/platform_data/bfin_rotary.h | 117 ------------------------------ 1 file changed, 117 deletions(-) delete mode 100644 include/linux/platform_data/bfin_rotary.h (limited to 'include/linux') diff --git a/include/linux/platform_data/bfin_rotary.h b/include/linux/platform_data/bfin_rotary.h deleted file mode 100644 index 98829370fee2..000000000000 --- a/include/linux/platform_data/bfin_rotary.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * board initialization should put one of these structures into platform_data - * and place the bfin-rotary onto platform_bus named "bfin-rotary". - * - * Copyright 2008-2010 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#ifndef _BFIN_ROTARY_H -#define _BFIN_ROTARY_H - -/* mode bitmasks */ -#define ROT_QUAD_ENC CNTMODE_QUADENC /* quadrature/grey code encoder mode */ -#define ROT_BIN_ENC CNTMODE_BINENC /* binary encoder mode */ -#define ROT_UD_CNT CNTMODE_UDCNT /* rotary counter mode */ -#define ROT_DIR_CNT CNTMODE_DIRCNT /* direction counter mode */ - -#define ROT_DEBE DEBE /* Debounce Enable */ - -#define ROT_CDGINV CDGINV /* CDG Pin Polarity Invert */ -#define ROT_CUDINV CUDINV /* CUD Pin Polarity Invert */ -#define ROT_CZMINV CZMINV /* CZM Pin Polarity Invert */ - -struct bfin_rotary_platform_data { - /* set rotary UP KEY_### or BTN_### in case you prefer - * bfin-rotary to send EV_KEY otherwise set 0 - */ - unsigned int rotary_up_key; - /* set rotary DOWN KEY_### or BTN_### in case you prefer - * bfin-rotary to send EV_KEY otherwise set 0 - */ - unsigned int rotary_down_key; - /* set rotary BUTTON KEY_### or BTN_### */ - unsigned int rotary_button_key; - /* set rotary Relative Axis REL_### in case you prefer - * bfin-rotary to send EV_REL otherwise set 0 - */ - unsigned int rotary_rel_code; - unsigned short debounce; /* 0..17 */ - unsigned short mode; - unsigned short pm_wakeup; - unsigned short *pin_list; -}; - -/* CNT_CONFIG bitmasks */ -#define CNTE (1 << 0) /* Counter Enable */ -#define DEBE (1 << 1) /* Debounce Enable */ -#define CDGINV (1 << 4) /* CDG Pin Polarity Invert */ -#define CUDINV (1 << 5) /* CUD Pin Polarity Invert */ -#define CZMINV (1 << 6) /* CZM Pin Polarity Invert */ -#define CNTMODE_SHIFT 8 -#define CNTMODE (0x7 << CNTMODE_SHIFT) /* Counter Operating Mode */ -#define ZMZC (1 << 1) /* CZM Zeroes Counter Enable */ -#define BNDMODE_SHIFT 12 -#define BNDMODE (0x3 << BNDMODE_SHIFT) /* Boundary register Mode */ -#define INPDIS (1 << 15) /* CUG and CDG Input Disable */ - -#define CNTMODE_QUADENC (0 << CNTMODE_SHIFT) /* quadrature encoder mode */ -#define CNTMODE_BINENC (1 << CNTMODE_SHIFT) /* binary encoder mode */ -#define CNTMODE_UDCNT (2 << CNTMODE_SHIFT) /* up/down counter mode */ -#define CNTMODE_DIRCNT (4 << CNTMODE_SHIFT) /* direction counter mode */ -#define CNTMODE_DIRTMR (5 << CNTMODE_SHIFT) /* direction timer mode */ - -#define BNDMODE_COMP (0 << BNDMODE_SHIFT) /* boundary compare mode */ -#define BNDMODE_ZERO (1 << BNDMODE_SHIFT) /* boundary compare and zero mode */ -#define BNDMODE_CAPT (2 << BNDMODE_SHIFT) /* boundary capture mode */ -#define BNDMODE_AEXT (3 << BNDMODE_SHIFT) /* boundary auto-extend mode */ - -/* CNT_IMASK bitmasks */ -#define ICIE (1 << 0) /* Illegal Gray/Binary Code Interrupt Enable */ -#define UCIE (1 << 1) /* Up count Interrupt Enable */ -#define DCIE (1 << 2) /* Down count Interrupt Enable */ -#define MINCIE (1 << 3) /* Min Count Interrupt Enable */ -#define MAXCIE (1 << 4) /* Max Count Interrupt Enable */ -#define COV31IE (1 << 5) /* Bit 31 Overflow Interrupt Enable */ -#define COV15IE (1 << 6) /* Bit 15 Overflow Interrupt Enable */ -#define CZEROIE (1 << 7) /* Count to Zero Interrupt Enable */ -#define CZMIE (1 << 8) /* CZM Pin Interrupt Enable */ -#define CZMEIE (1 << 9) /* CZM Error Interrupt Enable */ -#define CZMZIE (1 << 10) /* CZM Zeroes Counter Interrupt Enable */ - -/* CNT_STATUS bitmasks */ -#define ICII (1 << 0) /* Illegal Gray/Binary Code Interrupt Identifier */ -#define UCII (1 << 1) /* Up count Interrupt Identifier */ -#define DCII (1 << 2) /* Down count Interrupt Identifier */ -#define MINCII (1 << 3) /* Min Count Interrupt Identifier */ -#define MAXCII (1 << 4) /* Max Count Interrupt Identifier */ -#define COV31II (1 << 5) /* Bit 31 Overflow Interrupt Identifier */ -#define COV15II (1 << 6) /* Bit 15 Overflow Interrupt Identifier */ -#define CZEROII (1 << 7) /* Count to Zero Interrupt Identifier */ -#define CZMII (1 << 8) /* CZM Pin Interrupt Identifier */ -#define CZMEII (1 << 9) /* CZM Error Interrupt Identifier */ -#define CZMZII (1 << 10) /* CZM Zeroes Counter Interrupt Identifier */ - -/* CNT_COMMAND bitmasks */ -#define W1LCNT 0xf /* Load Counter Register */ -#define W1LMIN 0xf0 /* Load Min Register */ -#define W1LMAX 0xf00 /* Load Max Register */ -#define W1ZMONCE (1 << 12) /* Enable CZM Clear Counter Once */ - -#define W1LCNT_ZERO (1 << 0) /* write 1 to load CNT_COUNTER with zero */ -#define W1LCNT_MIN (1 << 2) /* write 1 to load CNT_COUNTER from CNT_MIN */ -#define W1LCNT_MAX (1 << 3) /* write 1 to load CNT_COUNTER from CNT_MAX */ - -#define W1LMIN_ZERO (1 << 4) /* write 1 to load CNT_MIN with zero */ -#define W1LMIN_CNT (1 << 5) /* write 1 to load CNT_MIN from CNT_COUNTER */ -#define W1LMIN_MAX (1 << 7) /* write 1 to load CNT_MIN from CNT_MAX */ - -#define W1LMAX_ZERO (1 << 8) /* write 1 to load CNT_MAX with zero */ -#define W1LMAX_CNT (1 << 9) /* write 1 to load CNT_MAX from CNT_COUNTER */ -#define W1LMAX_MIN (1 << 10) /* write 1 to load CNT_MAX from CNT_MIN */ - -/* CNT_DEBOUNCE bitmasks */ -#define DPRESCALE 0xf /* Load Counter Register */ - -#endif -- cgit v1.2.3 From 03f4c9abd73284193f70e64da1a266d393650530 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 16:10:20 +0100 Subject: usb: host: remove tilegx platform glue The tile architecture is getting removed, so the ehci and ohci platform glue drivers are no longer needed. In case of ohci, this is the last one to define a PLATFORM_DRIVER macro, so we can remove even more. Acked-by: Greg Kroah-Hartman Acked-by: Alan Stern Signed-off-by: Arnd Bergmann --- include/linux/usb/tilegx.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 include/linux/usb/tilegx.h (limited to 'include/linux') diff --git a/include/linux/usb/tilegx.h b/include/linux/usb/tilegx.h deleted file mode 100644 index 817908573fe8..000000000000 --- a/include/linux/usb/tilegx.h +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright 2012 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * Structure to contain platform-specific data related to Tile-Gx USB - * controllers. - */ - -#ifndef _LINUX_USB_TILEGX_H -#define _LINUX_USB_TILEGX_H - -#include - -struct tilegx_usb_platform_data { - /* GXIO device index. */ - int dev_index; - - /* GXIO device context. */ - gxio_usb_host_context_t usb_ctx; - - /* Device IRQ. */ - unsigned int irq; -}; - -#endif /* _LINUX_USB_TILEGX_H */ -- cgit v1.2.3 From a9762b704f5d5e167bbc261573621782b90efbc4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 17:37:54 +0100 Subject: usb: musb: remove blackfin port The blackfin architecture is getting removed, so we can clean up all the special cases in the musb driver. Acked-by: Greg Kroah-Hartman Acked-by: Aaron Wu Acked-by: Bin Liu Cc: Stephen Rothwell [arnd: adding in fixups from Aaron and Stephen] Signed-off-by: Arnd Bergmann --- include/linux/usb/musb.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index 5d19e6730475..9eb908a98033 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -89,13 +89,6 @@ struct musb_hdrc_config { u8 ram_bits; /* ram address size */ struct musb_hdrc_eps_bits *eps_bits __deprecated; -#ifdef CONFIG_BLACKFIN - /* A GPIO controlling VRSEL in Blackfin */ - unsigned int gpio_vrsel; - unsigned int gpio_vrsel_active; - /* musb CLKIN in Blackfin in MHZ */ - unsigned char clkin; -#endif u32 maximum_speed; }; -- cgit v1.2.3 From a470143fc83924251647143ff042bd2843e296cf Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 24 Jan 2018 20:24:24 +0200 Subject: net/utils: Introduce inet_addr_is_any Can be useful to check INET_ANY address for both ipv4/ipv6 addresses. Reviewed-by: Bart Van Assche Signed-off-by: Sagi Grimberg Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Jens Axboe --- include/linux/inet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inet.h b/include/linux/inet.h index 636ebe87e6f8..97defc1139e9 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -59,5 +59,6 @@ extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char extern int inet_pton_with_scope(struct net *net, unsigned short af, const char *src, const char *port, struct sockaddr_storage *addr); +extern bool inet_addr_is_any(struct sockaddr *addr); #endif /* _LINUX_INET_H */ -- cgit v1.2.3 From ede2762d93ff16e0974f7446516b46b1022db213 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 23 Mar 2018 19:47:19 +0300 Subject: net: Make NETDEV_XXX commands enum { } This patch is preparation to drop NETDEV_UNREGISTER_FINAL. Since the cmd is used in usnic_ib_netdev_event_to_string() to get cmd name, after plain removing NETDEV_UNREGISTER_FINAL from everywhere, we'd have holes in event2str[] in this function. Instead of that, let's make NETDEV_XXX commands names available for everyone, and to define netdev_cmd_to_name() in the way we won't have to shaffle names after their numbers are changed. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/linux/netdevice.h | 69 ++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 913b1cc882cf..dd5a04c971d5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2312,43 +2312,46 @@ struct netdev_lag_lower_state_info { #include -/* netdevice notifier chain. Please remember to update the rtnetlink - * notification exclusion list in rtnetlink_event() when adding new - * types. +/* netdevice notifier chain. Please remember to update netdev_cmd_to_name() + * and the rtnetlink notification exclusion list in rtnetlink_event() when + * adding new types. */ -#define NETDEV_UP 0x0001 /* For now you can't veto a device up/down */ -#define NETDEV_DOWN 0x0002 -#define NETDEV_REBOOT 0x0003 /* Tell a protocol stack a network interface +enum netdev_cmd { + NETDEV_UP = 1, /* For now you can't veto a device up/down */ + NETDEV_DOWN, + NETDEV_REBOOT, /* Tell a protocol stack a network interface detected a hardware crash and restarted - we can use this eg to kick tcp sessions once done */ -#define NETDEV_CHANGE 0x0004 /* Notify device state change */ -#define NETDEV_REGISTER 0x0005 -#define NETDEV_UNREGISTER 0x0006 -#define NETDEV_CHANGEMTU 0x0007 /* notify after mtu change happened */ -#define NETDEV_CHANGEADDR 0x0008 -#define NETDEV_GOING_DOWN 0x0009 -#define NETDEV_CHANGENAME 0x000A -#define NETDEV_FEAT_CHANGE 0x000B -#define NETDEV_BONDING_FAILOVER 0x000C -#define NETDEV_PRE_UP 0x000D -#define NETDEV_PRE_TYPE_CHANGE 0x000E -#define NETDEV_POST_TYPE_CHANGE 0x000F -#define NETDEV_POST_INIT 0x0010 -#define NETDEV_UNREGISTER_FINAL 0x0011 -#define NETDEV_RELEASE 0x0012 -#define NETDEV_NOTIFY_PEERS 0x0013 -#define NETDEV_JOIN 0x0014 -#define NETDEV_CHANGEUPPER 0x0015 -#define NETDEV_RESEND_IGMP 0x0016 -#define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ -#define NETDEV_CHANGEINFODATA 0x0018 -#define NETDEV_BONDING_INFO 0x0019 -#define NETDEV_PRECHANGEUPPER 0x001A -#define NETDEV_CHANGELOWERSTATE 0x001B -#define NETDEV_UDP_TUNNEL_PUSH_INFO 0x001C -#define NETDEV_UDP_TUNNEL_DROP_INFO 0x001D -#define NETDEV_CHANGE_TX_QUEUE_LEN 0x001E + NETDEV_CHANGE, /* Notify device state change */ + NETDEV_REGISTER, + NETDEV_UNREGISTER, + NETDEV_CHANGEMTU, /* notify after mtu change happened */ + NETDEV_CHANGEADDR, + NETDEV_GOING_DOWN, + NETDEV_CHANGENAME, + NETDEV_FEAT_CHANGE, + NETDEV_BONDING_FAILOVER, + NETDEV_PRE_UP, + NETDEV_PRE_TYPE_CHANGE, + NETDEV_POST_TYPE_CHANGE, + NETDEV_POST_INIT, + NETDEV_UNREGISTER_FINAL, + NETDEV_RELEASE, + NETDEV_NOTIFY_PEERS, + NETDEV_JOIN, + NETDEV_CHANGEUPPER, + NETDEV_RESEND_IGMP, + NETDEV_PRECHANGEMTU, /* notify before mtu change happened */ + NETDEV_CHANGEINFODATA, + NETDEV_BONDING_INFO, + NETDEV_PRECHANGEUPPER, + NETDEV_CHANGELOWERSTATE, + NETDEV_UDP_TUNNEL_PUSH_INFO, + NETDEV_UDP_TUNNEL_DROP_INFO, + NETDEV_CHANGE_TX_QUEUE_LEN, +}; +const char *netdev_cmd_to_name(enum netdev_cmd cmd); int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 070f2d7e264acd6316fc24092b7f51a18c75ac9c Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 23 Mar 2018 19:47:39 +0300 Subject: net: Drop NETDEV_UNREGISTER_FINAL Last user is gone after bdf5bd7f2132 "rds: tcp: remove register_netdevice_notifier infrastructure.", so we can remove this netdevice command. This allows to delete rtnl_lock() in netdev_run_todo(), which is hot path for net namespace unregistration. dev_change_net_namespace() and netdev_wait_allrefs() have rcu_barrier() before NETDEV_UNREGISTER_FINAL call, and the source commits say they were introduced to delemit the call with NETDEV_UNREGISTER, but this patch leaves them on the places, since they require additional analysis, whether we need in them for something else. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dd5a04c971d5..2a2d9cf50aa2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2336,7 +2336,6 @@ enum netdev_cmd { NETDEV_PRE_TYPE_CHANGE, NETDEV_POST_TYPE_CHANGE, NETDEV_POST_INIT, - NETDEV_UNREGISTER_FINAL, NETDEV_RELEASE, NETDEV_NOTIFY_PEERS, NETDEV_JOIN, -- cgit v1.2.3 From bc67a0daf8f3bc6fa8fcb68090f3c444de7f951c Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 26 Mar 2018 15:01:31 +0300 Subject: ipmr: Make vif fib notifiers common The fib-notifiers are tightly coupled with the vif_device which is already common. Move the notifier struct definition and helpers to the common file; Currently they're only used by ipmr. Signed-off-by: Yuval Mintz Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/mroute.h | 8 ------- include/linux/mroute_base.h | 53 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 7ed82e4f11b3..3f70a04a5879 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -55,14 +55,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule) } #endif -struct vif_entry_notifier_info { - struct fib_notifier_info info; - struct net_device *dev; - vifi_t vif_index; - unsigned short vif_flags; - u32 tb_id; -}; - #define VIFF_STATIC 0x8000 struct mfc_cache_cmp_arg { diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index c2560cb50f1d..23326f5402f3 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -6,6 +6,7 @@ #include #include #include +#include /** * struct vif_device - interface representor for multicast routing @@ -36,6 +37,58 @@ struct vif_device { __be32 local, remote; }; +struct vif_entry_notifier_info { + struct fib_notifier_info info; + struct net_device *dev; + unsigned short vif_index; + unsigned short vif_flags; + u32 tb_id; +}; + +static inline int mr_call_vif_notifier(struct notifier_block *nb, + struct net *net, + unsigned short family, + enum fib_event_type event_type, + struct vif_device *vif, + unsigned short vif_index, u32 tb_id) +{ + struct vif_entry_notifier_info info = { + .info = { + .family = family, + .net = net, + }, + .dev = vif->dev, + .vif_index = vif_index, + .vif_flags = vif->flags, + .tb_id = tb_id, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static inline int mr_call_vif_notifiers(struct net *net, + unsigned short family, + enum fib_event_type event_type, + struct vif_device *vif, + unsigned short vif_index, u32 tb_id, + unsigned int *ipmr_seq) +{ + struct vif_entry_notifier_info info = { + .info = { + .family = family, + .net = net, + }, + .dev = vif->dev, + .vif_index = vif_index, + .vif_flags = vif->flags, + .tb_id = tb_id, + }; + + ASSERT_RTNL(); + (*ipmr_seq)++; + return call_fib_notifiers(net, event_type, &info.info); +} + #ifndef MAXVIFS /* This one is nasty; value is defined in uapi using different symbols for * mroute and morute6 but both map into same 32. -- cgit v1.2.3 From 54c4cad97b8fd414909b78d4274a6797baa52b3b Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 26 Mar 2018 15:01:32 +0300 Subject: ipmr: Make MFC fib notifiers common Like vif notifications, move the notifier struct for MFC as well as its helpers into a common file; Currently they're only used by ipmr. Signed-off-by: Yuval Mintz Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/mroute.h | 6 ------ include/linux/mroute_base.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 3f70a04a5879..c855d80b51f7 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -80,12 +80,6 @@ struct mfc_cache { }; }; -struct mfc_entry_notifier_info { - struct fib_notifier_info info; - struct mfc_cache *mfc; - u32 tb_id; -}; - struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 23326f5402f3..2c594686c05e 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -152,6 +152,50 @@ struct mr_mfc { struct rcu_head rcu; }; +struct mfc_entry_notifier_info { + struct fib_notifier_info info; + struct mr_mfc *mfc; + u32 tb_id; +}; + +static inline int mr_call_mfc_notifier(struct notifier_block *nb, + struct net *net, + unsigned short family, + enum fib_event_type event_type, + struct mr_mfc *mfc, u32 tb_id) +{ + struct mfc_entry_notifier_info info = { + .info = { + .family = family, + .net = net, + }, + .mfc = mfc, + .tb_id = tb_id + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static inline int mr_call_mfc_notifiers(struct net *net, + unsigned short family, + enum fib_event_type event_type, + struct mr_mfc *mfc, u32 tb_id, + unsigned int *ipmr_seq) +{ + struct mfc_entry_notifier_info info = { + .info = { + .family = family, + .net = net, + }, + .mfc = mfc, + .tb_id = tb_id + }; + + ASSERT_RTNL(); + (*ipmr_seq)++; + return call_fib_notifiers(net, event_type, &info.info); +} + struct mr_table; /** -- cgit v1.2.3 From cdc9f9443b5c3a61c7cec807965054ee1fd29acf Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 26 Mar 2018 15:01:33 +0300 Subject: ipmr: Make ipmr_dump() common Since all the primitive elements used for the notification done by ipmr are now common [mr_table, mr_mfc, vif_device] we can refactor the logic for dumping them to a common file. Signed-off-by: Yuval Mintz Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 2c594686c05e..289eb5aa7b5d 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -277,6 +277,13 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), spinlock_t *lock); + +int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, + int (*rules_dump)(struct net *net, + struct notifier_block *nb), + struct mr_table *(*mr_iter)(struct net *net, + struct mr_table *mrt), + rwlock_t *mrt_lock); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -333,6 +340,17 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, { return -EINVAL; } + +static inline int mr_dump(struct net *net, struct notifier_block *nb, + unsigned short family, + int (*rules_dump)(struct net *net, + struct notifier_block *nb), + struct mr_table *(*mr_iter)(struct net *net, + struct mr_table *mrt), + rwlock_t *mrt_lock) +{ + return -EINVAL; +} #endif static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg) -- cgit v1.2.3 From d3c07e5b9939a055fa017f200e535ae947eb22ab Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 26 Mar 2018 15:01:35 +0300 Subject: ip6mr: Add API for default_rule fib Add the ability to discern whether a given FIB rule notification relates to the default rule inserted when registering ip6mr or a different one. Would later be used by drivers wishing to offload ipv6 multicast routes but unable to offload rules other than the default one. Signed-off-by: Yuval Mintz Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/mroute6.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 1ac38e6819f5..c4a45859f586 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) @@ -63,6 +64,15 @@ static inline void ip6_mr_cleanup(void) } #endif +#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES +bool ip6mr_rule_default(const struct fib_rule *rule); +#else +static inline bool ip6mr_rule_default(const struct fib_rule *rule) +{ + return true; +} +#endif + #define VIFF_STATIC 0x8000 struct mfc6_cache_cmp_arg { -- cgit v1.2.3 From 8c13af2a219c6498071b30ea558438c74267ae4d Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 26 Mar 2018 15:01:36 +0300 Subject: ip6mr: Add refcounting to mfc Since ipmr and ip6mr are using the same mr_mfc struct at their core, we can now refactor the ipmr_cache_{hold,put} logic and apply refcounting to both ipmr and ip6mr. Signed-off-by: Yuval Mintz Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/mroute.h | 19 ------------------- include/linux/mroute_base.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index c855d80b51f7..9a36fad9e068 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -84,23 +84,4 @@ struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid); - -#ifdef CONFIG_IP_MROUTE -void ipmr_cache_free(struct mfc_cache *mfc_cache); -#else -static inline void ipmr_cache_free(struct mfc_cache *mfc_cache) -{ -} -#endif - -static inline void ipmr_cache_put(struct mfc_cache *c) -{ - if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount)) - ipmr_cache_free(c); -} -static inline void ipmr_cache_hold(struct mfc_cache *c) -{ - refcount_inc(&c->_c.mfc_un.res.refcount); -} - #endif diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 289eb5aa7b5d..d617fe45543e 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -125,6 +125,7 @@ enum { * @refcount: reference count for this entry * @list: global entry list * @rcu: used for entry destruction + * @free: Operation used for freeing an entry under RCU */ struct mr_mfc { struct rhlist_head mnode; @@ -150,8 +151,20 @@ struct mr_mfc { } mfc_un; struct list_head list; struct rcu_head rcu; + void (*free)(struct rcu_head *head); }; +static inline void mr_cache_put(struct mr_mfc *c) +{ + if (refcount_dec_and_test(&c->mfc_un.res.refcount)) + call_rcu(&c->rcu, c->free); +} + +static inline void mr_cache_hold(struct mr_mfc *c) +{ + refcount_inc(&c->mfc_un.res.refcount); +} + struct mfc_entry_notifier_info { struct fib_notifier_info info; struct mr_mfc *mfc; -- cgit v1.2.3 From 2fcb12df7d2fa5a004fc3e7f589e58a08f7ed8c9 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Thu, 17 Aug 2017 16:39:47 +0300 Subject: net/mlx5e: Expose PFC stall prevention counters Add the needed capability bit and counters to device spec description. Expose the following two counters in ethtool: tx_pause_storm_warning_events: when the device is stalled for a period longer than a pre-configured watermark, the counter increase, allowing the debug utility an insight into current device status. tx_pause_storm_error_events: when the device is stalled for a period longer than a pre-configured timeout, the pause transmission is disabled, and the counter increase. Signed-off-by: Inbar Karmy Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 ++++ include/linux/mlx5/mlx5_ifc.h | 28 +++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e5258ee4e38b..4b5939c78cdd 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1013,6 +1013,7 @@ enum mlx5_cap_type { MLX5_CAP_RESERVED, MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, + MLX5_CAP_DEBUG, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1140,6 +1141,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_QOS(mdev, cap)\ MLX5_GET(qos_cap, mdev->caps.hca_cur[MLX5_CAP_QOS], cap) +#define MLX5_CAP_DEBUG(mdev, cap)\ + MLX5_GET(debug_cap, mdev->caps.hca_cur[MLX5_CAP_DEBUG], cap) + #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \ MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 14ad84afe8ba..c7d50eccff9e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -593,6 +593,16 @@ struct mlx5_ifc_qos_cap_bits { u8 reserved_at_100[0x700]; }; +struct mlx5_ifc_debug_cap_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x2]; + u8 stall_detect[0x1]; + u8 reserved_at_23[0x1d]; + + u8 reserved_at_40[0x7c0]; +}; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 csum_cap[0x1]; u8 vlan_cap[0x1]; @@ -855,7 +865,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 out_of_seq_cnt[0x1]; u8 vport_counters[0x1]; u8 retransmission_q_counters[0x1]; - u8 reserved_at_183[0x1]; + u8 debug[0x1]; u8 modify_rq_counter_set_id[0x1]; u8 rq_delay_drop[0x1]; u8 max_qp_cnt[0xa]; @@ -1572,7 +1582,17 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { u8 rx_pause_transition_low[0x20]; - u8 reserved_at_3c0[0x400]; + u8 reserved_at_3c0[0x40]; + + u8 device_stall_minor_watermark_cnt_high[0x20]; + + u8 device_stall_minor_watermark_cnt_low[0x20]; + + u8 device_stall_critical_watermark_cnt_high[0x20]; + + u8 device_stall_critical_watermark_cnt_low[0x20]; + + u8 reserved_at_480[0x340]; }; struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits { @@ -7874,8 +7894,10 @@ struct mlx5_ifc_peir_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x7b]; + u8 reserved_at_0[0x76]; + u8 pfcc_mask[0x1]; + u8 reserved_at_77[0x4]; u8 rx_buffer_fullness_counters[0x1]; u8 ptys_connector_type[0x1]; u8 reserved_at_7d[0x1]; -- cgit v1.2.3 From 2afa609f5c970185a8cae73f6a4caadf97fbea54 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Mon, 20 Nov 2017 18:06:20 +0200 Subject: net/mlx5e: PFC stall prevention support Implement set/get functions to configure PFC stall prevention timeout by tunables api through ethtool. By default the stall prevention timeout is configured to 8 sec. Timeout range is: 80-8000 msec. Enabling stall prevention with the auto timeout will set the timeout to 100 msec. Signed-off-by: Inbar Karmy Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 17 +++++++++++++---- include/linux/mlx5/port.h | 6 ++++++ 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c7d50eccff9e..f3200a9696d6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -7833,7 +7833,11 @@ struct mlx5_ifc_pifr_reg_bits { struct mlx5_ifc_pfcc_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; - u8 reserved_at_10[0x10]; + u8 reserved_at_10[0xb]; + u8 ppan_mask_n[0x1]; + u8 minor_stall_mask[0x1]; + u8 critical_stall_mask[0x1]; + u8 reserved_at_1e[0x2]; u8 ppan[0x4]; u8 reserved_at_24[0x4]; @@ -7843,17 +7847,22 @@ struct mlx5_ifc_pfcc_reg_bits { u8 pptx[0x1]; u8 aptx[0x1]; - u8 reserved_at_42[0x6]; + u8 pptx_mask_n[0x1]; + u8 reserved_at_43[0x5]; u8 pfctx[0x8]; u8 reserved_at_50[0x10]; u8 pprx[0x1]; u8 aprx[0x1]; - u8 reserved_at_62[0x6]; + u8 pprx_mask_n[0x1]; + u8 reserved_at_63[0x5]; u8 pfcrx[0x8]; u8 reserved_at_70[0x10]; - u8 reserved_at_80[0x80]; + u8 device_stall_minor_watermark[0x10]; + u8 device_stall_critical_watermark[0x10]; + + u8 reserved_at_a0[0x60]; }; struct mlx5_ifc_pelc_reg_bits { diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 035f0d4dc9fe..34aed6032f86 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -151,6 +151,12 @@ int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx); int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx); +int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, + u16 stall_critical_watermark, + u16 stall_minor_watermark); +int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, + u16 *stall_critical_watermark, u16 *stall_minor_watermark); + int mlx5_max_tc(struct mlx5_core_dev *mdev); int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); -- cgit v1.2.3 From 61c5b5c9178288a4caa3e39095aafb391c5100f6 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 7 Jan 2018 16:45:27 +0200 Subject: net/mlx5: Add support for QUERY_VNIC_ENV command Add support for new FW command QUERY_VNIC_ENV. The command is used by the driver to query vnic diagnostic statistics from FW. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 50 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3200a9696d6..52e373dd2679 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -143,6 +143,7 @@ enum { MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT = 0x763, MLX5_CMD_OP_QUERY_HCA_VPORT_GID = 0x764, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY = 0x765, + MLX5_CMD_OP_QUERY_VNIC_ENV = 0x76f, MLX5_CMD_OP_QUERY_VPORT_COUNTER = 0x770, MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, @@ -875,7 +876,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vhca_group_manager[0x1]; u8 ib_virt[0x1]; u8 eth_virt[0x1]; - u8 reserved_at_1a4[0x1]; + u8 vnic_env_queue_counters[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; @@ -2386,6 +2387,24 @@ struct mlx5_ifc_xrc_srqc_bits { u8 reserved_at_180[0x80]; }; +struct mlx5_ifc_vnic_diagnostic_statistics_bits { + u8 counter_error_queues[0x20]; + + u8 total_error_queues[0x20]; + + u8 send_queue_priority_update_flow[0x20]; + + u8 reserved_at_60[0x20]; + + u8 nic_receive_steering_discard[0x40]; + + u8 receive_discard_vport_down[0x40]; + + u8 transmit_discard_vport_down[0x40]; + + u8 reserved_at_140[0xec0]; +}; + struct mlx5_ifc_traffic_counter_bits { u8 packets[0x40]; @@ -3661,6 +3680,35 @@ struct mlx5_ifc_query_vport_state_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_query_vnic_env_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_vnic_diagnostic_statistics_bits vport_env; +}; + +enum { + MLX5_QUERY_VNIC_ENV_IN_OP_MOD_VPORT_DIAG_STATISTICS = 0x0, +}; + +struct mlx5_ifc_query_vnic_env_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_query_vport_counter_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v1.2.3 From 5c298143be17f5100656b9c140af672c644116d9 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 26 Dec 2017 16:46:29 +0200 Subject: net/mlx5e: Add vnic steering drop statistics Added the following packets drop counter: Rx steering missed dropped packets - counts packets which were dropped due to miss on NIC rx steering rules. This counter will be shown on ethtool as a new counter called rx_steer_missed_packets. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 52e373dd2679..9202113f552c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1008,7 +1008,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_330[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_at_340[0x8]; + u8 nic_receive_steering_discard[0x1]; + u8 reserved_at_341[0x7]; u8 log_max_flow_counter_bulk[0x8]; u8 max_flow_counter_15_0[0x10]; -- cgit v1.2.3 From aaabd0783b1cf46569f5fc1c60d79709815497fc Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 14 Jan 2018 00:56:25 +0200 Subject: net/mlx5: Add packet dropped while vport down statistics Added the following packets dropped while vport down statistics: Rx dropped while vport down - counts packets which were steered by e-switch to a vport, but dropped since the vport was down. This counter will be shown on ip link tool as part of the vport rx_dropped counter. Tx dropped while vport down - counts packets which were transmitted by a vport, but dropped due to vport logical link down. This counter will be shown on ip link tool as part of the vport tx_dropped counter. The counters are read from FW by command QUERY_VNIC_ENV. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- include/linux/mlx5/vport.h | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9202113f552c..1f3483d40055 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1009,7 +1009,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_xrcd[0x5]; u8 nic_receive_steering_discard[0x1]; - u8 reserved_at_341[0x7]; + u8 receive_discard_vport_down[0x1]; + u8 transmit_discard_vport_down[0x1]; + u8 reserved_at_343[0x5]; u8 log_max_flow_counter_bulk[0x8]; u8 max_flow_counter_15_0[0x10]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 64e193e87394..9208cb8809ac 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -107,6 +107,9 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); +int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, + u64 *rx_discard_vport_down, + u64 *tx_discard_vport_down); int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, int vf, u8 port_num, void *out, size_t out_sz); -- cgit v1.2.3 From 0c06897a9ac7e2db9ad2df15bc6511e8ab88378f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 28 Jan 2018 20:14:20 +0200 Subject: net/mlx5: Add core support for vlan push/pop steering action Newer NICs (ConnectX-5 and onward) can apply vlan pop or push as an action taking place during flow steering. Add the core bits for that. Signed-off-by: Or Gerlitz Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 7 +++++++ include/linux/mlx5/mlx5_ifc.h | 16 ++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index b957e52434f8..47aecc4fa8c2 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -142,6 +142,12 @@ struct mlx5_flow_group * mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); +struct mlx5_fs_vlan { + u16 ethtype; + u16 vid; + u8 prio; +}; + struct mlx5_flow_act { u32 action; bool has_flow_tag; @@ -149,6 +155,7 @@ struct mlx5_flow_act { u32 encap_id; u32 modify_id; uintptr_t esp_id; + struct mlx5_fs_vlan vlan; }; #define MLX5_DECLARE_FLOW_ACT(name) \ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1f3483d40055..c19e611d2782 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -314,7 +314,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 flow_table_modify[0x1]; u8 encap[0x1]; u8 decap[0x1]; - u8 reserved_at_9[0x17]; + u8 reserved_at_9[0x1]; + u8 pop_vlan[0x1]; + u8 push_vlan[0x1]; + u8 reserved_at_c[0x14]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; @@ -2311,10 +2314,19 @@ enum { MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10, MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20, MLX5_FLOW_CONTEXT_ACTION_MOD_HDR = 0x40, + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP = 0x80, + MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100, +}; + +struct mlx5_ifc_vlan_bits { + u8 ethtype[0x10]; + u8 prio[0x3]; + u8 cfi[0x1]; + u8 vid[0xc]; }; struct mlx5_ifc_flow_context_bits { - u8 reserved_at_0[0x20]; + struct mlx5_ifc_vlan_bits push_vlan; u8 group_id[0x20]; -- cgit v1.2.3 From fc5d1073cae299de4517755a910df4f12a6a438f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 26 Mar 2018 23:27:21 -0700 Subject: x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic node_memmap_size_bytes() has been unused since the v3.9 kernel, so remove it. Signed-off-by: David Rientjes Cc: Dave Hansen Cc: Laura Abbott Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Fixes: f03574f2d5b2 ("x86-32, mm: Rip out x86_32 NUMA remapping code") Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1803262325540.256524@chino.kir.corp.google.com Signed-off-by: Ingo Molnar --- include/linux/mmzone.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7522a6987595..a2db4576e499 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -816,10 +816,6 @@ int local_memory_node(int node_id); static inline int local_memory_node(int node_id) { return node_id; }; #endif -#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE -unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); -#endif - /* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. */ @@ -1289,7 +1285,6 @@ struct mminit_pfnnid_cache { #endif void memory_present(int nid, unsigned long start, unsigned long end); -unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); /* * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we -- cgit v1.2.3 From 5b644aa012f67fd211138a067b9f351f30bdcc60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 14 Mar 2018 13:10:42 +0100 Subject: mtd: partitions: add of_match_table parser matching for the "ofpart" type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to properly support compatibility strings as described in the bindings/mtd/partition.txt "ofpart" type should be treated as an indication for looking into OF. MTD should check "compatible" property and search for a matching parser rather than blindly trying the one supporting "fixed-partitions". It also means that existing "fixed-partitions" parser should get renamed to use a more meaningful name. This commit achievies that aim by introducing a new mtd_part_of_parse(). It works by looking for a matching parser for every string in the "compatibility" property (starting with the most specific one). Please note that driver-specified parsers still take a precedence. It's assumed that driver providing a parser type has a good reason for that (e.g. having platform data with device-specific info). Also doing otherwise could break existing setups. The same applies to using default parsers (including "cmdlinepart") as some overwrite DT data with cmdline argument. Partition parsers can now provide an of_match_table to enable flash<-->parser matching via device tree as documented in the mtd/partition.txt. This support is currently limited to built-in parsers as it uses request_module() and friends. This should be sufficient for most cases though as compiling parsers as modules isn't a common choice. Signed-off-by: Brian Norris Signed-off-by: Rafał Miłecki Tested-by: Peter Rosin Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon --- include/linux/mtd/partitions.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index c4beb70dacbd..11cb0c50cd84 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -77,6 +77,7 @@ struct mtd_part_parser { struct list_head list; struct module *owner; const char *name; + const struct of_device_id *of_match_table; int (*parse_fn)(struct mtd_info *, const struct mtd_partition **, struct mtd_part_parser_data *); void (*cleanup)(const struct mtd_partition *pparts, int nr_parts); -- cgit v1.2.3 From 6691dffab0ab6301bb7b489b1dcf9f5efdef202f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 28 Feb 2018 14:08:57 +0100 Subject: reset: add support for non-DT systems The reset framework only supports device-tree. There are some platforms however, which need to use it even in legacy, board-file based mode. An example of such architecture is the DaVinci family of SoCs which supports both device tree and legacy boot modes and we don't want to introduce any regressions. We're currently working on converting the platform from its hand-crafted clock API to using the common clock framework. Part of the overhaul will be representing the chip's power sleep controller's reset lines using the reset framework. This changeset extends the core reset code with a new reset lookup entry structure. It contains data allowing the reset core to associate reset lines with devices by comparing the dev_id and con_id strings. It also provides a function allowing drivers to register lookup entries with the framework. The new lookup function is only called as a fallback in case the of_node field is NULL and doesn't change anything for current users. Tested with a dummy reset driver with several lookup entries. An example lookup table registration from a driver can be found below: static struct reset_control_lookup foobar_reset_lookup[] = { RESET_LOOKUP("foo.0", "foo", 15), RESET_LOOKUP("bar.0", NULL, 5), }; foobar_probe() { ... reset_controller_add_lookup(&rcdev, foobar_reset_lookup, ARRAY_SIZE(foobar_reset_lookup)); ... } Cc: Sekhar Nori Cc: Kevin Hilman Cc: David Lechner Signed-off-by: Bartosz Golaszewski Signed-off-by: Philipp Zabel --- include/linux/reset-controller.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index adb88f8cefbc..25698f6c1fae 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -26,6 +26,30 @@ struct module; struct device_node; struct of_phandle_args; +/** + * struct reset_control_lookup - represents a single lookup entry + * + * @list: internal list of all reset lookup entries + * @rcdev: reset controller device controlling this reset line + * @index: ID of the reset controller in the reset controller device + * @dev_id: name of the device associated with this reset line + * @con_id name of the reset line (can be NULL) + */ +struct reset_control_lookup { + struct list_head list; + struct reset_controller_dev *rcdev; + unsigned int index; + const char *dev_id; + const char *con_id; +}; + +#define RESET_LOOKUP(_dev_id, _con_id, _index) \ + { \ + .dev_id = _dev_id, \ + .con_id = _con_id, \ + .index = _index, \ + } + /** * struct reset_controller_dev - reset controller entity that might * provide multiple reset controls @@ -58,4 +82,8 @@ struct device; int devm_reset_controller_register(struct device *dev, struct reset_controller_dev *rcdev); +void reset_controller_add_lookup(struct reset_controller_dev *rcdev, + struct reset_control_lookup *lookup, + unsigned int num_entries); + #endif -- cgit v1.2.3 From e2749bb998701e21cdb8b34486b82fc1c051ab41 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Mar 2018 14:04:48 +0100 Subject: reset: modify the way reset lookup works for board files Commit 7af1bb19f1d7 ("reset: add support for non-DT systems") introduced reset control lookup mechanism for boards that still use board files. The routine used to register lookup entries takes the corresponding reset_controlled_dev structure as argument. It's been determined however that for the first user of this new interface - davinci psc driver - it will be easier to register the lookup entries using the reset controller device name. This patch changes the way lookup entries are added. Signed-off-by: Bartosz Golaszewski [p.zabel@pengutronix.de: added missing ERR_PTR] Signed-off-by: Philipp Zabel --- include/linux/reset-controller.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index 25698f6c1fae..9326d671b6e6 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -30,24 +30,25 @@ struct of_phandle_args; * struct reset_control_lookup - represents a single lookup entry * * @list: internal list of all reset lookup entries - * @rcdev: reset controller device controlling this reset line + * @provider: name of the reset controller device controlling this reset line * @index: ID of the reset controller in the reset controller device * @dev_id: name of the device associated with this reset line * @con_id name of the reset line (can be NULL) */ struct reset_control_lookup { struct list_head list; - struct reset_controller_dev *rcdev; + const char *provider; unsigned int index; const char *dev_id; const char *con_id; }; -#define RESET_LOOKUP(_dev_id, _con_id, _index) \ +#define RESET_LOOKUP(_provider, _index, _dev_id, _con_id) \ { \ + .provider = _provider, \ + .index = _index, \ .dev_id = _dev_id, \ .con_id = _con_id, \ - .index = _index, \ } /** @@ -57,6 +58,7 @@ struct reset_control_lookup { * @owner: kernel module of the reset controller driver * @list: internal list of reset controller devices * @reset_control_head: head of internal list of requested reset controls + * @dev: corresponding driver model device struct * @of_node: corresponding device tree node as phandle target * @of_reset_n_cells: number of cells in reset line specifiers * @of_xlate: translation function to translate from specifier as found in the @@ -68,6 +70,7 @@ struct reset_controller_dev { struct module *owner; struct list_head list; struct list_head reset_control_head; + struct device *dev; struct device_node *of_node; int of_reset_n_cells; int (*of_xlate)(struct reset_controller_dev *rcdev, @@ -82,8 +85,7 @@ struct device; int devm_reset_controller_register(struct device *dev, struct reset_controller_dev *rcdev); -void reset_controller_add_lookup(struct reset_controller_dev *rcdev, - struct reset_control_lookup *lookup, +void reset_controller_add_lookup(struct reset_control_lookup *lookup, unsigned int num_entries); #endif -- cgit v1.2.3 From 3af345258617e0412059c1ab6462495947f73e89 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Mar 2018 20:07:59 +0200 Subject: firmware/dmi_scan: Uninline dmi_get_bios_year() helper Uninline dmi_get_bios_year() which, in particular, allows us to optimize it in the future. While doing this, convert the function to return an error code when BIOS date is not present or not parsable, or CONFIG_DMI=n. Additionally, during the move, add a bit of documentation. Suggested-by: Bjorn Helgaas Suggested-by: Rafael J. Wysocki Signed-off-by: Andy Shevchenko Reviewed-by: Jean Delvare Reviewed-by: Rafael J. Wysocki Acked-by: Thomas Gleixner Cc: Bjorn Helgaas Cc: Linus Torvalds Cc: Lukas Wunner Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: linux-acpi@vger.kernel.org Cc: linux-pci@vger.kernel.org Fixes: 492a1abd61e4 ("dmi: Introduce the dmi_get_bios_year() helper function") Signed-off-by: Ingo Molnar --- include/linux/dmi.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 0bade156e908..6a86d8db16d9 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -106,6 +106,7 @@ extern void dmi_scan_machine(void); extern void dmi_memdev_walk(void); extern void dmi_set_dump_stack_arch_desc(void); extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp); +extern int dmi_get_bios_year(void); extern int dmi_name_in_vendors(const char *str); extern int dmi_name_in_serial(const char *str); extern int dmi_available; @@ -133,6 +134,7 @@ static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp) *dayp = 0; return false; } +static inline int dmi_get_bios_year(void) { return -ENXIO; } static inline int dmi_name_in_vendors(const char *s) { return 0; } static inline int dmi_name_in_serial(const char *s) { return 0; } #define dmi_available 0 @@ -147,13 +149,4 @@ static inline const struct dmi_system_id * #endif -static inline int dmi_get_bios_year(void) -{ - int year; - - dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL); - - return year; -} - #endif /* __DMI_H__ */ -- cgit v1.2.3 From 726cb3ba49692bdae6caff457755e7cdb432efa4 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 23 Mar 2018 09:34:52 -0700 Subject: gpiolib: Support 'gpio-reserved-ranges' property Some qcom platforms make some GPIOs or pins unavailable for use by non-secure operating systems, and thus reading or writing the registers for those pins will cause access control issues. Add support for a DT property to describe the set of GPIOs that are available for use so that higher level OSes are able to know what pins to avoid reading/writing. Non-DT platforms can add support by directly updating the chip->valid_mask. Signed-off-by: Stephen Boyd Signed-off-by: Stephen Boyd Tested-by: Timur Tabi Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1ba9a331ec51..5382b5183b7e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -288,6 +288,21 @@ struct gpio_chip { struct gpio_irq_chip irq; #endif + /** + * @need_valid_mask: + * + * If set core allocates @valid_mask with all bits set to one. + */ + bool need_valid_mask; + + /** + * @valid_mask: + * + * If not %NULL holds bitmask of GPIOs which are valid to be used + * from the chip. + */ + unsigned long *valid_mask; + #if defined(CONFIG_OF_GPIO) /* * If CONFIG_OF is enabled, then all GPIO controllers described in the @@ -384,6 +399,7 @@ bool gpiochip_line_is_open_source(struct gpio_chip *chip, unsigned int offset); /* Sleep persistence inquiry for drivers */ bool gpiochip_line_is_persistent(struct gpio_chip *chip, unsigned int offset); +bool gpiochip_line_is_valid(const struct gpio_chip *chip, unsigned int offset); /* get driver data */ void *gpiochip_get_data(struct gpio_chip *chip); -- cgit v1.2.3 From 4420bf21fb6c0306e36ad58ade1e741fba57ce65 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 27 Mar 2018 18:02:23 +0300 Subject: net: Rename net_sem to pernet_ops_rwsem net_sem is some undefined area name, so it will be better to make the area more defined. Rename it to pernet_ops_rwsem for better readability and better intelligibility. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 562a175c35a9..c7d1e4689325 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -36,7 +36,7 @@ extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern wait_queue_head_t netdev_unregistering_wq; -extern struct rw_semaphore net_sem; +extern struct rw_semaphore pernet_ops_rwsem; #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); -- cgit v1.2.3 From c8d75a980fab886a9c716567e6b47cc414ad84ee Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 22 Mar 2018 15:34:04 +0200 Subject: IB/mlx5: Respect new UMR capabilities In some firmware configuration, UMR usage from Virtual Functions is restricted. This information is published to the driver using new capability bits. Avoid using UMRs in these cases and use the Firmware slow-path flow to create mkeys and populate them with Virtual to Physical address translation. Older drivers that do not have this patch, will end up using memory keys that aren't populated with Virtual to Physical address translation that is done part of the UMR work. Reviewed-by: Mark Bloch Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Tested-by: Laurence Oberman Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c63bbdc35503..64963fd2cd9b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -916,7 +916,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_202[0x1]; u8 ipoib_enhanced_offloads[0x1]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_205[0x5]; + u8 reserved_at_205[0x1]; + u8 repeated_block_disabled[0x1]; + u8 umr_modify_entity_size_disabled[0x1]; + u8 umr_modify_atomic_disabled[0x1]; + u8 umr_indirect_mkey_disabled[0x1]; u8 umr_fence[0x2]; u8 reserved_at_20c[0x3]; u8 drain_sigerr[0x1]; -- cgit v1.2.3 From 2816077127230ef52cc7497903e71def45747611 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 26 Dec 2017 15:17:05 +0200 Subject: mlx5_{ib,core}: Add query SQ state helper function Move query SQ state function from mlx5_ib to mlx5_core in order to have it in shared code. It will be used in a downstream patch from mlx5e. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/transobj.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 7e8f281f8c00..80d7aa8b2831 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -47,6 +47,7 @@ int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out); +int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state); int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn); int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, -- cgit v1.2.3 From 1acae6b030164217b9c6a52245eade730057152b Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 31 Dec 2017 12:55:26 +0200 Subject: mlx5: Move dump error CQE function out of mlx5_ib for code sharing Move mlx5_ib dump error CQE implementation to mlx5 CQ header file in order to use it in a downstream patch from mlx5e. In addition, use print_hex_dump instead of manual dumping of the buffer. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/cq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 445ad194e0fe..0ef6138eca49 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -193,6 +193,12 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u16 cq_period, u16 cq_max_count); +static inline void mlx5_dump_err_cqe(struct mlx5_core_dev *dev, + struct mlx5_err_cqe *err_cqe) +{ + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, + sizeof(*err_cqe), false); +} int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); -- cgit v1.2.3 From 3cdb741efa02c5053a738d5816b70de11c4d6364 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 27 Mar 2018 08:53:01 -0700 Subject: regulator: qcom: smd: Add pm8998 and pmi8998 regulators Add the pm8998 and pmi8998 regulators as used in the MSM8998 platform. Signed-off-by: Bjorn Andersson Signed-off-by: Mark Brown --- include/linux/soc/qcom/smd-rpm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 9f5c6e53f3a5..9e4fdd861a51 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -10,6 +10,7 @@ struct qcom_smd_rpm; /* * Constants used for addressing resources in the RPM. */ +#define QCOM_SMD_RPM_BOBB 0x62626f62 #define QCOM_SMD_RPM_BOOST 0x61747362 #define QCOM_SMD_RPM_BUS_CLK 0x316b6c63 #define QCOM_SMD_RPM_BUS_MASTER 0x73616d62 -- cgit v1.2.3 From f23f5bece686a76598335141a091934f7eb0998c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 27 Mar 2018 09:39:06 -0600 Subject: blk-mq: Allow PCI vector offset for mapping queues The PCI interrupt vectors intended to be associated with a queue may not start at 0; a driver may allocate pre_vectors for special use. This patch adds an offset parameter so blk-mq may find the intended affinity mask and updates all drivers using this API accordingly. Cc: Don Brace Cc: Cc: Signed-off-by: Keith Busch Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq-pci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h index 6338551e0fb9..9f4c17f0d2d8 100644 --- a/include/linux/blk-mq-pci.h +++ b/include/linux/blk-mq-pci.h @@ -5,6 +5,7 @@ struct blk_mq_tag_set; struct pci_dev; -int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev); +int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev, + int offset); #endif /* _LINUX_BLK_MQ_PCI_H */ -- cgit v1.2.3 From 0e11f6443f522f89509495b13ef1f3745640144d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Feb 2018 07:54:49 -0800 Subject: fs: move I_DIRTY_INODE to fs.h And use it in a few more places rather than opencoding the values. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7b2caadb292..00da24bc0350 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2014,7 +2014,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) -#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) +#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) +#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) extern void __mark_inode_dirty(struct inode *, int); -- cgit v1.2.3 From e89f5b37015309a8bdf0b21d08007580b92f92a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Mar 2018 15:35:35 +0200 Subject: dma-mapping: Don't clear GFP_ZERO in dma_alloc_attrs Revert the clearing of __GFP_ZERO in dma_alloc_attrs and move it to dma_direct_alloc for now. While most common architectures always zero dma cohereny allocations (and x86 did so since day one) this is not documented and at least arc and s390 do not zero without the explicit __GFP_ZERO argument. Fixes: 57bf5a8963f8 ("dma-mapping: clear harmful GFP_* flags in common code") Reported-by: Evgeniy Didin Reported-by: Sebastian Ott Signed-off-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Tested-by: Evgeniy Didin Cc: iommu@lists.linux-foundation.org Link: https://lkml.kernel.org/r/20180328133535.17302-2-hch@lst.de --- include/linux/dma-mapping.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index eb9eab4ecd6d..12fedcba9a9a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -518,12 +518,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; - /* - * Let the implementation decide on the zone to allocate from, and - * decide on the way of zeroing the memory given that the memory - * returned should always be zeroed. - */ - flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_ZERO); + /* let the implementation decide on the zone to allocate from: */ + flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); if (!arch_dma_alloc_attrs(&dev, &flag)) return NULL; -- cgit v1.2.3 From 9ea393d8d8377b6da8ee25c6a114ec24c0687c7c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 28 Mar 2018 18:43:57 +0300 Subject: stm class: Add SPDX GPL-2.0 header to replace GPLv2 boilerplate This adds SPDX GPL-2.0 header to to stm core files and removes the GPLv2 boilerplate text. Signed-off-by: Alexander Shishkin --- include/linux/stm.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stm.h b/include/linux/stm.h index 210ff2292361..c6f577ab6f21 100644 --- a/include/linux/stm.h +++ b/include/linux/stm.h @@ -1,15 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * System Trace Module (STM) infrastructure apis * Copyright (C) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef _STM_H_ -- cgit v1.2.3 From 8ecd2953d0a1b78748b36f5bed6f233f5bd6d6ea Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 28 Mar 2018 18:41:25 +1100 Subject: ipc/shm: fix up for struct file no longer being available in shm.h Stephen Rothewell wrote: > After merging the userns tree, today's linux-next build (powerpc > ppc64_defconfig) produced this warning: > > In file included from include/linux/sched.h:16:0, > from arch/powerpc/lib/xor_vmx_glue.c:14: > include/linux/shm.h:17:35: error: 'struct file' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] > bool is_file_shm_hugepages(struct file *file); > ^~~~ > > and many, many more (most warnings, but some errors - arch/powerpc is > mostly built with -Werror) I dug through this and I discovered that the error was caused by the removal of struct shmid_kernel from shm.h when building on powerpc. Except for observing the existence of "struct file *shm_file" in struct shmid_kernel I have no clue why the structure move would cause such a failure. I suspect shm.h always needed the forward declaration and someting had been confusing gcc into not issuing the warning. --EWB Fixes: a2e102cd3cdd ("shm: Move struct shmid_kernel into ipc/shm.c") Signed-off-by: Stephen Rothwell Signed-off-by: Eric W. Biederman --- include/linux/shm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/shm.h b/include/linux/shm.h index 3a8eae3ca33c..d8e69aed3d32 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -7,6 +7,8 @@ #include #include +struct file; + #ifdef CONFIG_SYSVIPC struct sysv_shm { struct list_head shm_clist; -- cgit v1.2.3 From 5a485803221777013944cbd1a7cd5c62efba3ffa Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 20 Mar 2018 15:02:05 +0100 Subject: x86/hyper-v: move hyperv.h out of uapi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hyperv.h is not part of uapi, there are no (known) users outside of kernel. We are making changes to this file to match current Hyper-V Hypervisor Top-Level Functional Specification (TLFS, see: https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs) and we don't want to maintain backwards compatibility. Move the file renaming to hyperv-tlfs.h to avoid confusing it with mshyperv.h. In future, all definitions from TLFS should go to it and all kernel objects should go to mshyperv.h or include/linux/hyperv.h. Signed-off-by: Vitaly Kuznetsov Acked-by: Thomas Gleixner Signed-off-by: Radim Krčmář --- include/linux/hyperv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 93bd6fcd6e62..eed8b33b0173 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -26,7 +26,6 @@ #define _HYPERV_H #include -#include #include #include -- cgit v1.2.3 From cf14f27f82af78e713f8a57c477cf9233faf8b30 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 28 Mar 2018 12:05:36 -0700 Subject: macro: introduce COUNT_ARGS() macro move COUNT_ARGS() macro from apparmor to generic header and extend it to count till twelve. COUNT() was an alternative name for this logic, but it's used for different purpose in many other places. Similarly for CONCATENATE() macro. Suggested-by: Linus Torvalds Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/kernel.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3fd291503576..293fa0677fba 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -919,6 +919,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +/* This counts to 12. Any more, it will return 13th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + /** * container_of - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. -- cgit v1.2.3 From c4f6699dfcb8558d138fe838f741b2c10f416cf9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 28 Mar 2018 12:05:37 -0700 Subject: bpf: introduce BPF_RAW_TRACEPOINT Introduce BPF_PROG_TYPE_RAW_TRACEPOINT bpf program type to access kernel internal arguments of the tracepoints in their raw form. >From bpf program point of view the access to the arguments look like: struct bpf_raw_tracepoint_args { __u64 args[0]; }; int bpf_prog(struct bpf_raw_tracepoint_args *ctx) { // program can read args[N] where N depends on tracepoint // and statically verified at program load+attach time } kprobe+bpf infrastructure allows programs access function arguments. This feature allows programs access raw tracepoint arguments. Similar to proposed 'dynamic ftrace events' there are no abi guarantees to what the tracepoints arguments are and what their meaning is. The program needs to type cast args properly and use bpf_probe_read() helper to access struct fields when argument is a pointer. For every tracepoint __bpf_trace_##call function is prepared. In assembler it looks like: (gdb) disassemble __bpf_trace_xdp_exception Dump of assembler code for function __bpf_trace_xdp_exception: 0xffffffff81132080 <+0>: mov %ecx,%ecx 0xffffffff81132082 <+2>: jmpq 0xffffffff811231f0 where TRACE_EVENT(xdp_exception, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, u32 act), The above assembler snippet is casting 32-bit 'act' field into 'u64' to pass into bpf_trace_run3(), while 'dev' and 'xdp' args are passed as-is. All of ~500 of __bpf_trace_*() functions are only 5-10 byte long and in total this approach adds 7k bytes to .text. This approach gives the lowest possible overhead while calling trace_xdp_exception() from kernel C code and transitioning into bpf land. Since tracepoint+bpf are used at speeds of 1M+ events per second this is valuable optimization. The new BPF_RAW_TRACEPOINT_OPEN sys_bpf command is introduced that returns anon_inode FD of 'bpf-raw-tracepoint' object. The user space looks like: // load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type prog_fd = bpf_prog_load(...); // receive anon_inode fd for given bpf_raw_tracepoint with prog attached raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd); Ctrl-C of tracing daemon or cmdline tool that uses this feature will automatically detach bpf program, unload it and unregister tracepoint probe. On the kernel side the __bpf_raw_tp_map section of pointers to tracepoint definition and to __bpf_trace_*() probe function is used to find a tracepoint with "xdp_exception" name and corresponding __bpf_trace_xdp_exception() probe function which are passed to tracepoint_probe_register() to connect probe with tracepoint. Addition of bpf_raw_tracepoint doesn't interfere with ftrace and perf tracepoint mechanisms. perf_event_open() can be used in parallel on the same tracepoint. Multiple bpf_raw_tracepoint_open("xdp_exception", prog_fd) are permitted. Each with its own bpf program. The kernel will execute all tracepoint probes and all attached bpf programs. In the future bpf_raw_tracepoints can be extended with query/introspection logic. __bpf_raw_tp_map section logic was contributed by Steven Rostedt Signed-off-by: Alexei Starovoitov Signed-off-by: Steven Rostedt (VMware) Acked-by: Steven Rostedt (VMware) Signed-off-by: Daniel Borkmann --- include/linux/bpf_types.h | 1 + include/linux/trace_events.h | 42 +++++++++++++++++++++++++++++++++++++++++ include/linux/tracepoint-defs.h | 6 ++++++ 3 files changed, 49 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 5e2e8a49fb21..6d7243bfb0ff 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -19,6 +19,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8a1442c4e513..b0357cd198b0 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -468,6 +468,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); void perf_event_detach_bpf_prog(struct perf_event *event); int perf_event_query_prog_array(struct perf_event *event, void __user *info); +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog); +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog); +struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -487,6 +490,18 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info) { return -EOPNOTSUPP; } +static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p) +{ + return -EOPNOTSUPP; +} +static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p) +{ + return -EOPNOTSUPP; +} +static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name) +{ + return NULL; +} #endif enum { @@ -546,6 +561,33 @@ extern void ftrace_profile_free_filter(struct perf_event *event); void perf_trace_buf_update(void *record, u16 type); void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1); +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2); +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3); +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4); +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5); +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6); +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8); +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9); +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9, u64 arg10); +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9, u64 arg10, u64 arg11); +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12); void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct trace_event_call *call, u64 count, struct pt_regs *regs, struct hlist_head *head, diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 64ed7064f1fa..22c5a46e9693 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -35,4 +35,10 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +struct bpf_raw_event_map { + struct tracepoint *tp; + void *bpf_func; + u32 num_args; +} __aligned(32); + #endif -- cgit v1.2.3 From e59ac634908f4ea90066e6db7dd7ae8ca02815ff Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 28 Mar 2018 17:48:33 -0700 Subject: bpf: add parenthesis around argument of BPF_LDST_BYTES() BPF_LDST_BYTES() does not put it's argument in parenthesis when referencing it. This makes it impossible to pass pointers obtained by address-of operator (e.g. BPF_LDST_BYTES(&insn)). Add the parenthesis. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 109d05ccea9a..c2f167db8bd5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -372,7 +372,7 @@ struct xdp_rxq_info; #define BPF_LDST_BYTES(insn) \ ({ \ - const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \ + const int __size = bpf_size_to_bytes(BPF_SIZE((insn)->code)); \ WARN_ON(__size < 0); \ __size; \ }) -- cgit v1.2.3 From 64bdff698092aa6be28c3b248f887022eec77902 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 14 Mar 2018 12:27:21 +0100 Subject: PM: cpuidle/suspend: Add s2idle usage and time state attributes Add a new attribute group called "s2idle" under the sysfs directory of each cpuidle state that supports the ->enter_s2idle callback and put two new attributes, "usage" and "time", into that group to represent the number of times the given state was requested for suspend-to-idle and the total time spent in suspend-to-idle after requesting that state, respectively. That will allow diagnostic information related to suspend-to-idle to be collected without enabling advanced debug features and analyzing dmesg output. Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 0b3fc229086c..a806e94c482f 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -33,6 +33,10 @@ struct cpuidle_state_usage { unsigned long long disable; unsigned long long usage; unsigned long long time; /* in US */ +#ifdef CONFIG_SUSPEND + unsigned long long s2idle_usage; + unsigned long long s2idle_time; /* in US */ +#endif }; struct cpuidle_state { -- cgit v1.2.3 From 6ed70cf342de03c7b11cd4eb032705faeb29d284 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 29 Mar 2018 15:06:48 +0300 Subject: perf/x86/pt, coresight: Clean up address filter structure This is a cosmetic patch that deals with the address filter structure's ambiguous fields 'filter' and 'range'. The former stands to mean that the filter's *action* should be to filter the traces to its address range if it's set or stop tracing if it's unset. This is confusing and hard on the eyes, so this patch replaces it with 'action' enum. The 'range' field is completely redundant (meaning that the filter is an address range as opposed to a single address trigger), as we can use zero size to mean the same thing. Signed-off-by: Alexander Shishkin Acked-by: Mathieu Poirier Acked-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Will Deacon Link: http://lkml.kernel.org/r/20180329120648.11902-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ff39ab011376..e71e99eb9a4e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -449,14 +449,19 @@ struct pmu { int (*filter_match) (struct perf_event *event); /* optional */ }; +enum perf_addr_filter_action_t { + PERF_ADDR_FILTER_ACTION_STOP = 0, + PERF_ADDR_FILTER_ACTION_START, + PERF_ADDR_FILTER_ACTION_FILTER, +}; + /** * struct perf_addr_filter - address range filter definition * @entry: event's filter list linkage * @inode: object file's inode for file-based filters * @offset: filter range offset - * @size: filter range size - * @range: 1: range, 0: address - * @filter: 1: filter/start, 0: stop + * @size: filter range size (size==0 means single address trigger) + * @action: filter/start/stop * * This is a hardware-agnostic filter configuration as specified by the user. */ @@ -465,8 +470,7 @@ struct perf_addr_filter { struct inode *inode; unsigned long offset; unsigned long size; - unsigned int range : 1, - filter : 1; + enum perf_addr_filter_action_t action; }; /** -- cgit v1.2.3 From f0b07bb151b098d291fd1fd71ef7a2df56fb124a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 29 Mar 2018 19:20:32 +0300 Subject: net: Introduce net_rwsem to protect net_namespace_list rtnl_lock() is used everywhere, and contention is very high. When someone wants to iterate over alive net namespaces, he/she has no a possibility to do that without exclusive lock. But the exclusive rtnl_lock() in such places is overkill, and it just increases the contention. Yes, there is already for_each_net_rcu() in kernel, but it requires rcu_read_lock(), and this can't be sleepable. Also, sometimes it may be need really prevent net_namespace_list growth, so for_each_net_rcu() is not fit there. This patch introduces new rw_semaphore, which will be used instead of rtnl_mutex to protect net_namespace_list. It is sleepable and allows not-exclusive iterations over net namespaces list. It allows to stop using rtnl_lock() in several places (what is made in next patches) and makes less the time, we keep rtnl_mutex. Here we just add new lock, while the explanation of we can remove rtnl_lock() there are in next patches. Fine grained locks generally are better, then one big lock, so let's do that with net_namespace_list, while the situation allows that. Signed-off-by: Kirill Tkhai Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index c7d1e4689325..5225832bd6ff 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -37,6 +37,7 @@ extern int rtnl_lock_killable(void); extern wait_queue_head_t netdev_unregistering_wq; extern struct rw_semaphore pernet_ops_rwsem; +extern struct rw_semaphore net_rwsem; #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); -- cgit v1.2.3 From 50bc60cb155c813157fdca5b3b05194cd325d3e9 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 28 Mar 2018 11:42:16 +0300 Subject: qed*: Utilize FW 8.33.11.0 This FW contains several fixes and features RDMA Features - SRQ support - XRC support - Memory window support - RDMA low latency queue support - RDMA bonding support RDMA bug fixes - RDMA remote invalidate during retransmit fix - iWARP MPA connect interop issue with RTR fix - iWARP Legacy DPM support - Fix MPA reject flow - iWARP error handling - RQ WQE validation checks MISC - Fix some HSI types endianity - New Restriction: vlan insertion in core_tx_bd_data can't be set for LB packets ETH - HW QoS offload support - Fix vlan, dcb and sriov flow of VF sending a packet with inband VLAN tag instead of default VLAN - Allow GRE version 1 offloads in RX flow - Allow VXLAN steering iSCSI / FcoE - Fix bd availability checking flow - Support 256th sge proerly in iscsi/fcoe retransmit - Performance improvement - Fix handle iSCSI command arrival with AHS and with immediate - Fix ipv6 traffic class configuration DEBUG - Update debug utilities Signed-off-by: Michal Kalderon Signed-off-by: Tomer Tayar Signed-off-by: Manish Rangankar Signed-off-by: Ariel Elior Acked-by: Jason Gunthorpe Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 2 +- include/linux/qed/eth_common.h | 2 +- include/linux/qed/iscsi_common.h | 4 ++-- include/linux/qed/rdma_common.h | 2 ++ include/linux/qed/roce_common.h | 3 +++ 5 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 2b3b350e07b7..13c8ab171437 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -110,7 +110,7 @@ #define FW_MAJOR_VERSION 8 #define FW_MINOR_VERSION 33 -#define FW_REVISION_VERSION 1 +#define FW_REVISION_VERSION 11 #define FW_ENGINEERING_VERSION 0 /***********************/ diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index 9db02856623b..d9416ad5ef59 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -105,7 +105,7 @@ #define ETH_CTL_FRAME_ETH_TYPE_NUM 4 /* GFS constants */ -#define ETH_GFT_TRASH_CAN_VPORT 0x1FF +#define ETH_GFT_TRASHCAN_VPORT 0x1FF /* GFT drop flow vport number */ /* Destination port mode */ enum dest_port_mode { diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index 4cc9b37b8d95..938df614cb6a 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -753,8 +753,8 @@ struct e4_ystorm_iscsi_task_ag_ctx { #define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT 5 #define E4_YSTORM_ISCSI_TASK_AG_CTX_VALID_MASK 0x1 #define E4_YSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT 6 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK 0x1 -#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT 7 +#define E4_YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_MASK 0x1 /* bit3 */ +#define E4_YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_SHIFT 7 u8 flags1; #define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0_MASK 0x3 #define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT 0 diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index c1a446ebe362..480a57eb36cc 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -51,6 +51,8 @@ #define RDMA_MAX_CQS (64 * 1024) #define RDMA_MAX_TIDS (128 * 1024 - 1) #define RDMA_MAX_PDS (64 * 1024) +#define RDMA_MAX_XRC_SRQS (1024) +#define RDMA_MAX_SRQS (32 * 1024) #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS #define RDMA_NUM_STATISTIC_COUNTERS_K2 MAX_NUM_VPORTS_K2 diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h index e15e0da71240..193bcef302e1 100644 --- a/include/linux/qed/roce_common.h +++ b/include/linux/qed/roce_common.h @@ -59,6 +59,9 @@ enum roce_async_events_type { ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR, ROCE_ASYNC_EVENT_SRQ_EMPTY, ROCE_ASYNC_EVENT_DESTROY_QP_DONE, + ROCE_ASYNC_EVENT_XRC_DOMAIN_ERR, + ROCE_ASYNC_EVENT_INVALID_XRCETH_ERR, + ROCE_ASYNC_EVENT_XRC_SRQ_CATASTROPHIC_ERR, MAX_ROCE_ASYNC_EVENTS_TYPE }; -- cgit v1.2.3 From 3a69cae80cdd1b5c8b23137cba2a80ecfec4cef5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 28 Mar 2018 05:14:22 -0700 Subject: qed: Adapter flash update support. This patch adds the required driver support for updating the flash or non volatile memory of the adapter. At highlevel, flash upgrade comprises of reading the flash images from the input file, validating the images and writing them to the respective paritions. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 15e398c7230e..b5b2bc9eacca 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -483,6 +483,15 @@ struct qed_int_info { u8 used_cnt; }; +#define QED_NVM_SIGNATURE 0x12435687 + +enum qed_nvm_flash_cmd { + QED_NVM_FLASH_CMD_FILE_DATA = 0x2, + QED_NVM_FLASH_CMD_FILE_START = 0x3, + QED_NVM_FLASH_CMD_NVM_CHANGE = 0x4, + QED_NVM_FLASH_CMD_NVM_MAX, +}; + struct qed_common_cb_ops { void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); void (*link_update)(void *dev, @@ -657,6 +666,16 @@ struct qed_common_ops { void (*chain_free)(struct qed_dev *cdev, struct qed_chain *p_chain); +/** + * @brief nvm_flash - Flash nvm data. + * + * @param cdev + * @param name - file containing the data + * + * @return 0 on success, error otherwise. + */ + int (*nvm_flash)(struct qed_dev *cdev, const char *name); + /** * @brief nvm_get_image - reads an entire image from nvram * -- cgit v1.2.3 From 903ddaf49329076862d65f7284d825759ff67bd6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Mar 2018 12:47:04 -0500 Subject: take out orphan externs (empty_string/slash_string) Signed-off-by: Al Viro --- include/linux/dcache.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 82a99d366aec..c84ffbfc5098 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -56,9 +56,7 @@ struct qstr { #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } -extern const char empty_string[]; extern const struct qstr empty_name; -extern const char slash_string[]; extern const struct qstr slash_name; struct dentry_stat_t { -- cgit v1.2.3 From 8934ce2fd08171e8605f7fada91ee7619fe17ab8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 28 Mar 2018 12:49:15 -0700 Subject: bpf: sockmap redirect ingress support Add support for the BPF_F_INGRESS flag in sk_msg redirect helper. To do this add a scatterlist ring for receiving socks to check before calling into regular recvmsg call path. Additionally, because the poll wakeup logic only checked the skb recv queue we need to add a hook in TCP stack (similar to write side) so that we have a way to wake up polling socks when a scatterlist is redirected to that sock. After this all that is needed is for the redirect helper to push the scatterlist into the psock receive queue. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index c2f167db8bd5..961cc5d53956 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -521,6 +521,7 @@ struct sk_msg_buff { __u32 key; __u32 flags; struct bpf_map *map; + struct list_head list; }; /* Compute the linear packet data range [data, data_end) which -- cgit v1.2.3 From fa246693a111fab32bd51d20f07a347e42773ee9 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 28 Mar 2018 12:49:25 -0700 Subject: bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT: Add support for the BPF_F_INGRESS flag in skb redirect helper. To do this convert skb into a scatterlist and push into ingress queue. This is the same logic that is used in the sk_msg redirect helper so it should feel familiar. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 961cc5d53956..897ff3d95968 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -521,6 +521,7 @@ struct sk_msg_buff { __u32 key; __u32 flags; struct bpf_map *map; + struct sk_buff *skb; struct list_head list; }; -- cgit v1.2.3 From c6ac3f35d46b3c9999838dd13e7e113674f22ffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 30 Mar 2018 00:05:01 +0200 Subject: lightnvm: flatten nvm_id_group into nvm_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no groups in the 2.0 specification, make sure that the nvm_id structure is flattened before 2.0 data structures are added. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 53 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 7f4b60abdf27..94b704a8d83d 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -154,9 +154,29 @@ struct nvm_id_lp_tbl { struct nvm_id_lp_mlc mlc; }; -struct nvm_id_group { - u8 mtype; - u8 fmtype; +struct nvm_addr_format { + u8 ch_offset; + u8 ch_len; + u8 lun_offset; + u8 lun_len; + u8 pln_offset; + u8 pln_len; + u8 blk_offset; + u8 blk_len; + u8 pg_offset; + u8 pg_len; + u8 sect_offset; + u8 sect_len; +}; + +struct nvm_id { + u8 ver_id; + u8 vmnt; + u32 cap; + u32 dom; + + struct nvm_addr_format ppaf; + u8 num_ch; u8 num_lun; u16 num_chk; @@ -180,33 +200,12 @@ struct nvm_id_group { u16 cpar; /* 1.2 compatibility */ + u8 mtype; + u8 fmtype; + u8 num_pln; u16 num_pg; u16 fpg_sz; -}; - -struct nvm_addr_format { - u8 ch_offset; - u8 ch_len; - u8 lun_offset; - u8 lun_len; - u8 pln_offset; - u8 pln_len; - u8 blk_offset; - u8 blk_len; - u8 pg_offset; - u8 pg_len; - u8 sect_offset; - u8 sect_len; -}; - -struct nvm_id { - u8 ver_id; - u8 vmnt; - u32 cap; - u32 dom; - struct nvm_addr_format ppaf; - struct nvm_id_group grp; } __packed; struct nvm_target { -- cgit v1.2.3 From 62771fe0aa28b5d329f3e53a2e0f805f73433752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 30 Mar 2018 00:05:02 +0200 Subject: lightnvm: add 2.0 geometry identification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the geometry data structures for 2.0 and enable a drive to be identified as one, including exposing the appropriate 2.0 sysfs entries. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 94b704a8d83d..b717c000b712 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -184,10 +184,9 @@ struct nvm_id { u16 csecs; u16 sos; - u16 ws_min; - u16 ws_opt; - u16 ws_seq; - u16 ws_per_chk; + u32 ws_min; + u32 ws_opt; + u32 mw_cunits; u32 trdt; u32 trdm; @@ -199,6 +198,10 @@ struct nvm_id { u32 mccap; u16 cpar; + /* calculated values */ + u16 ws_seq; + u16 ws_per_chk; + /* 1.2 compatibility */ u8 mtype; u8 fmtype; -- cgit v1.2.3 From af569398c390810fca773c903a85b71dfd870bb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 30 Mar 2018 00:05:03 +0200 Subject: lightnvm: remove max_rq_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The field is no longer used. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index b717c000b712..67b4fa8e4906 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -295,8 +295,6 @@ struct nvm_geo { int ws_seq; int ws_per_chk; - int max_rq_size; - int op; struct nvm_addr_format ppaf; -- cgit v1.2.3 From 89a09c5643e01f5e5d3c5f2e720053473a60a90b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 30 Mar 2018 00:05:04 +0200 Subject: lightnvm: remove nvm_dev_ops->max_phys_sect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value of max_phys_sect is always static. Instead of defining it in the nvm_dev_ops structure, declare it as a global value. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 67b4fa8e4906..e55b10573c99 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -73,8 +73,6 @@ struct nvm_dev_ops { nvm_destroy_dma_pool_fn *destroy_dma_pool; nvm_dev_dma_alloc_fn *dev_dma_alloc; nvm_dev_dma_free_fn *dev_dma_free; - - unsigned int max_phys_sect; }; #ifdef CONFIG_NVM @@ -228,6 +226,8 @@ struct nvm_target { #define NVM_VERSION_MINOR 0 #define NVM_VERSION_PATCH 0 +#define NVM_MAX_VLBA (64) /* max logical blocks in a vector command */ + struct nvm_rq; typedef void (nvm_end_io_fn)(struct nvm_rq *); @@ -436,7 +436,6 @@ extern void nvm_unregister(struct nvm_dev *); extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, int, int); -extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *); extern void nvm_end_io(struct nvm_rq *); -- cgit v1.2.3 From e46f4e4822bdecf9bcbc2e71b2a3ae7f37464a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 30 Mar 2018 00:05:10 +0200 Subject: lightnvm: simplify geometry structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the device geometry is stored redundantly in the nvm_id and nvm_geo structures at a device level. Moreover, when instantiating targets on a specific number of LUNs, these structures are replicated and manually modified to fit the instance channel and LUN partitioning. Instead, create a generic geometry around nvm_geo, which can be used by (i) the underlying device to describe the geometry of the whole device, and (ii) instances to describe their geometry independently. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 196 +++++++++++++++++++++++------------------------ 1 file changed, 98 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index e55b10573c99..6e650563b379 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -50,7 +50,7 @@ struct nvm_id; struct nvm_dev; struct nvm_tgt_dev; -typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); +typedef int (nvm_id_fn)(struct nvm_dev *); typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); @@ -152,62 +152,48 @@ struct nvm_id_lp_tbl { struct nvm_id_lp_mlc mlc; }; -struct nvm_addr_format { - u8 ch_offset; +struct nvm_addrf_12 { u8 ch_len; - u8 lun_offset; u8 lun_len; - u8 pln_offset; - u8 pln_len; - u8 blk_offset; u8 blk_len; - u8 pg_offset; u8 pg_len; - u8 sect_offset; + u8 pln_len; u8 sect_len; -}; - -struct nvm_id { - u8 ver_id; - u8 vmnt; - u32 cap; - u32 dom; - struct nvm_addr_format ppaf; - - u8 num_ch; - u8 num_lun; - u16 num_chk; - u16 clba; - u16 csecs; - u16 sos; - - u32 ws_min; - u32 ws_opt; - u32 mw_cunits; - - u32 trdt; - u32 trdm; - u32 tprt; - u32 tprm; - u32 tbet; - u32 tbem; - u32 mpos; - u32 mccap; - u16 cpar; + u8 ch_offset; + u8 lun_offset; + u8 blk_offset; + u8 pg_offset; + u8 pln_offset; + u8 sect_offset; - /* calculated values */ - u16 ws_seq; - u16 ws_per_chk; + u64 ch_mask; + u64 lun_mask; + u64 blk_mask; + u64 pg_mask; + u64 pln_mask; + u64 sec_mask; +}; - /* 1.2 compatibility */ - u8 mtype; - u8 fmtype; +struct nvm_addrf { + u8 ch_len; + u8 lun_len; + u8 chk_len; + u8 sec_len; + u8 rsv_len[2]; - u8 num_pln; - u16 num_pg; - u16 fpg_sz; -} __packed; + u8 ch_offset; + u8 lun_offset; + u8 chk_offset; + u8 sec_offset; + u8 rsv_off[2]; + + u64 ch_mask; + u64 lun_mask; + u64 chk_mask; + u64 sec_mask; + u64 rsv_mask[2]; +}; struct nvm_target { struct list_head list; @@ -274,36 +260,63 @@ enum { NVM_BLK_ST_BAD = 0x8, /* Bad block */ }; - -/* Device generic information */ +/* Instance geometry */ struct nvm_geo { - /* generic geometry */ + /* device reported version */ + u8 ver_id; + + /* instance specific geometry */ int nr_chnls; - int all_luns; /* across channels */ - int nr_luns; /* per channel */ - int nr_chks; /* per lun */ + int nr_luns; /* per channel */ - int sec_size; - int oob_size; - int mccap; + /* calculated values */ + int all_luns; /* across channels */ + int all_chunks; /* across channels */ + + int op; /* over-provision in instance */ + + sector_t total_secs; /* across channels */ + + /* chunk geometry */ + u32 nr_chks; /* chunks per lun */ + u32 clba; /* sectors per chunk */ + u16 csecs; /* sector size */ + u16 sos; /* out-of-band area size */ - int sec_per_chk; - int sec_per_lun; + /* device write constrains */ + u32 ws_min; /* minimum write size */ + u32 ws_opt; /* optimal write size */ + u32 mw_cunits; /* distance required for successful read */ - int ws_min; - int ws_opt; - int ws_seq; - int ws_per_chk; + /* device capabilities */ + u32 mccap; - int op; + /* device timings */ + u32 trdt; /* Avg. Tread (ns) */ + u32 trdm; /* Max Tread (ns) */ + u32 tprt; /* Avg. Tprog (ns) */ + u32 tprm; /* Max Tprog (ns) */ + u32 tbet; /* Avg. Terase (ns) */ + u32 tbem; /* Max Terase (ns) */ - struct nvm_addr_format ppaf; + /* generic address format */ + struct nvm_addrf addrf; - /* Legacy 1.2 specific geometry */ - int plane_mode; /* drive device in single, double or quad mode */ - int nr_planes; - int sec_per_pg; /* only sectors for a single page */ - int sec_per_pl; /* all sectors across planes */ + /* 1.2 compatibility */ + u8 vmnt; + u32 cap; + u32 dom; + + u8 mtype; + u8 fmtype; + + u16 cpar; + u32 mpos; + + u8 num_pln; + u8 plane_mode; + u16 num_pg; + u16 fpg_sz; }; /* sub-device structure */ @@ -314,9 +327,6 @@ struct nvm_tgt_dev { /* Base ppas for target LUNs */ struct ppa_addr *luns; - sector_t total_secs; - - struct nvm_id identity; struct request_queue *q; struct nvm_dev *parent; @@ -331,13 +341,9 @@ struct nvm_dev { /* Device information */ struct nvm_geo geo; - unsigned long total_secs; - unsigned long *lun_map; void *dma_pool; - struct nvm_id identity; - /* Backend device */ struct request_queue *q; char name[DISK_NAME_LEN]; @@ -357,14 +363,15 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, struct ppa_addr r) { struct nvm_geo *geo = &tgt_dev->geo; + struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf; struct ppa_addr l; - l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset; - l.ppa |= ((u64)r.g.pg) << geo->ppaf.pg_offset; - l.ppa |= ((u64)r.g.sec) << geo->ppaf.sect_offset; - l.ppa |= ((u64)r.g.pl) << geo->ppaf.pln_offset; - l.ppa |= ((u64)r.g.lun) << geo->ppaf.lun_offset; - l.ppa |= ((u64)r.g.ch) << geo->ppaf.ch_offset; + l.ppa = ((u64)r.g.ch) << ppaf->ch_offset; + l.ppa |= ((u64)r.g.lun) << ppaf->lun_offset; + l.ppa |= ((u64)r.g.blk) << ppaf->blk_offset; + l.ppa |= ((u64)r.g.pg) << ppaf->pg_offset; + l.ppa |= ((u64)r.g.pl) << ppaf->pln_offset; + l.ppa |= ((u64)r.g.sec) << ppaf->sect_offset; return l; } @@ -373,24 +380,17 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev, struct ppa_addr r) { struct nvm_geo *geo = &tgt_dev->geo; + struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf; struct ppa_addr l; l.ppa = 0; - /* - * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc. - */ - l.g.blk = (r.ppa >> geo->ppaf.blk_offset) & - (((1 << geo->ppaf.blk_len) - 1)); - l.g.pg |= (r.ppa >> geo->ppaf.pg_offset) & - (((1 << geo->ppaf.pg_len) - 1)); - l.g.sec |= (r.ppa >> geo->ppaf.sect_offset) & - (((1 << geo->ppaf.sect_len) - 1)); - l.g.pl |= (r.ppa >> geo->ppaf.pln_offset) & - (((1 << geo->ppaf.pln_len) - 1)); - l.g.lun |= (r.ppa >> geo->ppaf.lun_offset) & - (((1 << geo->ppaf.lun_len) - 1)); - l.g.ch |= (r.ppa >> geo->ppaf.ch_offset) & - (((1 << geo->ppaf.ch_len) - 1)); + + l.g.ch = (r.ppa & ppaf->ch_mask) >> ppaf->ch_offset; + l.g.lun = (r.ppa & ppaf->lun_mask) >> ppaf->lun_offset; + l.g.blk = (r.ppa & ppaf->blk_mask) >> ppaf->blk_offset; + l.g.pg = (r.ppa & ppaf->pg_mask) >> ppaf->pg_offset; + l.g.pl = (r.ppa & ppaf->pln_mask) >> ppaf->pln_offset; + l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sect_offset; return l; } -- cgit v1.2.3 From 3cb98f84d368b3bbe07a2d5bf938e31f74567620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 30 Mar 2018 00:05:11 +0200 Subject: lightnvm: add minor version to generic geometry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate the version between major and minor on the generic geometry and represent it through sysfs in the 2.0 path. The 1.2 path only shows the major version to preserve the existing user space interface. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 6e650563b379..7ed8b92d6744 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -263,7 +263,8 @@ enum { /* Instance geometry */ struct nvm_geo { /* device reported version */ - u8 ver_id; + u8 major_ver_id; + u8 minor_ver_id; /* instance specific geometry */ int nr_chnls; -- cgit v1.2.3 From f1d4e8121f3fc25f9be94c6de6b8f5f788ad0265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 30 Mar 2018 00:05:12 +0200 Subject: lightnvm: add shorten OCSSD version in geo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a shorten version to use in the generic geometry. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 7ed8b92d6744..a073c0c76260 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -23,6 +23,11 @@ enum { #define NVM_LUN_BITS (8) #define NVM_CH_BITS (7) +enum { + NVM_OCSSD_SPEC_12 = 12, + NVM_OCSSD_SPEC_20 = 20, +}; + struct ppa_addr { /* Generic structure for all addresses */ union { @@ -266,6 +271,9 @@ struct nvm_geo { u8 major_ver_id; u8 minor_ver_id; + /* kernel short version */ + u8 version; + /* instance specific geometry */ int nr_chnls; int nr_luns; /* per channel */ -- cgit v1.2.3 From 3f48021bad73696421e2725c856b9b3aec7f567c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 30 Mar 2018 00:05:13 +0200 Subject: lightnvm: complete geo structure with maxoc* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete the generic geometry structure with the maxoc and maxocpu felds, present in the 2.0 spec. Also, expose them through sysfs. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index a073c0c76260..870959a58fef 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -296,6 +296,8 @@ struct nvm_geo { u32 ws_min; /* minimum write size */ u32 ws_opt; /* optimal write size */ u32 mw_cunits; /* distance required for successful read */ + u32 maxoc; /* maximum open chunks */ + u32 maxocpu; /* maximum open chunks per parallel unit */ /* device capabilities */ u32 mccap; -- cgit v1.2.3 From a40afad90b9a253b282183eb9365f1cc14aeff77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 30 Mar 2018 00:05:14 +0200 Subject: lightnvm: normalize geometry nomenclature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normalize nomenclature for naming channels, luns, chunks, planes and sectors as well as derivations in order to improve readability. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 870959a58fef..00295d9f9522 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -163,14 +163,14 @@ struct nvm_addrf_12 { u8 blk_len; u8 pg_len; u8 pln_len; - u8 sect_len; + u8 sec_len; u8 ch_offset; u8 lun_offset; u8 blk_offset; u8 pg_offset; u8 pln_offset; - u8 sect_offset; + u8 sec_offset; u64 ch_mask; u64 lun_mask; @@ -275,8 +275,8 @@ struct nvm_geo { u8 version; /* instance specific geometry */ - int nr_chnls; - int nr_luns; /* per channel */ + int num_ch; + int num_lun; /* per channel */ /* calculated values */ int all_luns; /* across channels */ @@ -287,7 +287,7 @@ struct nvm_geo { sector_t total_secs; /* across channels */ /* chunk geometry */ - u32 nr_chks; /* chunks per lun */ + u32 num_chk; /* chunks per lun */ u32 clba; /* sectors per chunk */ u16 csecs; /* sector size */ u16 sos; /* out-of-band area size */ @@ -325,7 +325,7 @@ struct nvm_geo { u32 mpos; u8 num_pln; - u8 plane_mode; + u8 pln_mode; u16 num_pg; u16 fpg_sz; }; @@ -382,7 +382,7 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, l.ppa |= ((u64)r.g.blk) << ppaf->blk_offset; l.ppa |= ((u64)r.g.pg) << ppaf->pg_offset; l.ppa |= ((u64)r.g.pl) << ppaf->pln_offset; - l.ppa |= ((u64)r.g.sec) << ppaf->sect_offset; + l.ppa |= ((u64)r.g.sec) << ppaf->sec_offset; return l; } @@ -401,7 +401,7 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev, l.g.blk = (r.ppa & ppaf->blk_mask) >> ppaf->blk_offset; l.g.pg = (r.ppa & ppaf->pg_mask) >> ppaf->pg_offset; l.g.pl = (r.ppa & ppaf->pln_mask) >> ppaf->pln_offset; - l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sect_offset; + l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sec_offset; return l; } -- cgit v1.2.3 From 694715137482b10d5be83b1dadf9a3cdee2ce1bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 30 Mar 2018 00:05:15 +0200 Subject: lightnvm: add support for 2.0 address format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for 2.0 address format. Also, align address bits for 1.2 and 2.0 to be able to operate on channel and luns without requiring a format conversion. Use a generic address format for this purpose. Also, convert the generic operations to the generic format in pblk. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 101 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 74 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 00295d9f9522..f2549b4b8626 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -16,12 +16,21 @@ enum { NVM_IOTYPE_GC = 1, }; -#define NVM_BLK_BITS (16) -#define NVM_PG_BITS (16) -#define NVM_SEC_BITS (8) -#define NVM_PL_BITS (8) -#define NVM_LUN_BITS (8) -#define NVM_CH_BITS (7) +/* common format */ +#define NVM_GEN_CH_BITS (8) +#define NVM_GEN_LUN_BITS (8) +#define NVM_GEN_BLK_BITS (16) +#define NVM_GEN_RESERVED (32) + +/* 1.2 format */ +#define NVM_12_PG_BITS (16) +#define NVM_12_PL_BITS (4) +#define NVM_12_SEC_BITS (4) +#define NVM_12_RESERVED (8) + +/* 2.0 format */ +#define NVM_20_SEC_BITS (24) +#define NVM_20_RESERVED (8) enum { NVM_OCSSD_SPEC_12 = 12, @@ -31,16 +40,34 @@ enum { struct ppa_addr { /* Generic structure for all addresses */ union { + /* generic device format */ struct { - u64 blk : NVM_BLK_BITS; - u64 pg : NVM_PG_BITS; - u64 sec : NVM_SEC_BITS; - u64 pl : NVM_PL_BITS; - u64 lun : NVM_LUN_BITS; - u64 ch : NVM_CH_BITS; - u64 reserved : 1; + u64 ch : NVM_GEN_CH_BITS; + u64 lun : NVM_GEN_LUN_BITS; + u64 blk : NVM_GEN_BLK_BITS; + u64 reserved : NVM_GEN_RESERVED; + } a; + + /* 1.2 device format */ + struct { + u64 ch : NVM_GEN_CH_BITS; + u64 lun : NVM_GEN_LUN_BITS; + u64 blk : NVM_GEN_BLK_BITS; + u64 pg : NVM_12_PG_BITS; + u64 pl : NVM_12_PL_BITS; + u64 sec : NVM_12_SEC_BITS; + u64 reserved : NVM_12_RESERVED; } g; + /* 2.0 device format */ + struct { + u64 grp : NVM_GEN_CH_BITS; + u64 pu : NVM_GEN_LUN_BITS; + u64 chk : NVM_GEN_BLK_BITS; + u64 sec : NVM_20_SEC_BITS; + u64 reserved : NVM_20_RESERVED; + } m; + struct { u64 line : 63; u64 is_cached : 1; @@ -374,15 +401,25 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, struct ppa_addr r) { struct nvm_geo *geo = &tgt_dev->geo; - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf; struct ppa_addr l; - l.ppa = ((u64)r.g.ch) << ppaf->ch_offset; - l.ppa |= ((u64)r.g.lun) << ppaf->lun_offset; - l.ppa |= ((u64)r.g.blk) << ppaf->blk_offset; - l.ppa |= ((u64)r.g.pg) << ppaf->pg_offset; - l.ppa |= ((u64)r.g.pl) << ppaf->pln_offset; - l.ppa |= ((u64)r.g.sec) << ppaf->sec_offset; + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf; + + l.ppa = ((u64)r.g.ch) << ppaf->ch_offset; + l.ppa |= ((u64)r.g.lun) << ppaf->lun_offset; + l.ppa |= ((u64)r.g.blk) << ppaf->blk_offset; + l.ppa |= ((u64)r.g.pg) << ppaf->pg_offset; + l.ppa |= ((u64)r.g.pl) << ppaf->pln_offset; + l.ppa |= ((u64)r.g.sec) << ppaf->sec_offset; + } else { + struct nvm_addrf *lbaf = &geo->addrf; + + l.ppa = ((u64)r.m.grp) << lbaf->ch_offset; + l.ppa |= ((u64)r.m.pu) << lbaf->lun_offset; + l.ppa |= ((u64)r.m.chk) << lbaf->chk_offset; + l.ppa |= ((u64)r.m.sec) << lbaf->sec_offset; + } return l; } @@ -391,17 +428,27 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev, struct ppa_addr r) { struct nvm_geo *geo = &tgt_dev->geo; - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf; struct ppa_addr l; l.ppa = 0; - l.g.ch = (r.ppa & ppaf->ch_mask) >> ppaf->ch_offset; - l.g.lun = (r.ppa & ppaf->lun_mask) >> ppaf->lun_offset; - l.g.blk = (r.ppa & ppaf->blk_mask) >> ppaf->blk_offset; - l.g.pg = (r.ppa & ppaf->pg_mask) >> ppaf->pg_offset; - l.g.pl = (r.ppa & ppaf->pln_mask) >> ppaf->pln_offset; - l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sec_offset; + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&geo->addrf; + + l.g.ch = (r.ppa & ppaf->ch_mask) >> ppaf->ch_offset; + l.g.lun = (r.ppa & ppaf->lun_mask) >> ppaf->lun_offset; + l.g.blk = (r.ppa & ppaf->blk_mask) >> ppaf->blk_offset; + l.g.pg = (r.ppa & ppaf->pg_mask) >> ppaf->pg_offset; + l.g.pl = (r.ppa & ppaf->pln_mask) >> ppaf->pln_offset; + l.g.sec = (r.ppa & ppaf->sec_mask) >> ppaf->sec_offset; + } else { + struct nvm_addrf *lbaf = &geo->addrf; + + l.m.grp = (r.ppa & lbaf->ch_mask) >> lbaf->ch_offset; + l.m.pu = (r.ppa & lbaf->lun_mask) >> lbaf->lun_offset; + l.m.chk = (r.ppa & lbaf->chk_mask) >> lbaf->chk_offset; + l.m.sec = (r.ppa & lbaf->sec_mask) >> lbaf->sec_offset; + } return l; } -- cgit v1.2.3 From 7100d50a7e58a6884368001e2b1a32b7169c072c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 30 Mar 2018 00:05:16 +0200 Subject: lightnvm: make address conversions depend on generic device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On address conversions, use the generic device, instead of the target device. This allows to use conversions outside of the target's realm. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index f2549b4b8626..f3b273e543c3 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -397,10 +397,10 @@ struct nvm_dev { struct list_head targets; }; -static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, +static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, struct ppa_addr r) { - struct nvm_geo *geo = &tgt_dev->geo; + struct nvm_geo *geo = &dev->geo; struct ppa_addr l; if (geo->version == NVM_OCSSD_SPEC_12) { @@ -424,10 +424,10 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, return l; } -static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev, +static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, struct ppa_addr r) { - struct nvm_geo *geo = &tgt_dev->geo; + struct nvm_geo *geo = &dev->geo; struct ppa_addr l; l.ppa = 0; -- cgit v1.2.3 From a294c199455187d124b0760fa8f86c13cdaa4b25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 30 Mar 2018 00:05:17 +0200 Subject: lightnvm: implement get log report chunk helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 2.0 spec provides a report chunk log page that can be retrieved using the stangard nvme get log page. This replaces the dedicated get/put bad block table in 1.2. This patch implements the helper functions to allow targets retrieve the chunk metadata using get log page. It makes nvme_get_log_ext available outside of nvme core so that we can use it form lightnvm. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index f3b273e543c3..da45efa09bb2 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -81,10 +81,13 @@ struct nvm_rq; struct nvm_id; struct nvm_dev; struct nvm_tgt_dev; +struct nvm_chk_meta; typedef int (nvm_id_fn)(struct nvm_dev *); typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); +typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, struct nvm_chk_meta *, + sector_t, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); @@ -98,6 +101,8 @@ struct nvm_dev_ops { nvm_op_bb_tbl_fn *get_bb_tbl; nvm_op_set_bb_fn *set_bb_tbl; + nvm_get_chk_meta_fn *get_chk_meta; + nvm_submit_io_fn *submit_io; nvm_submit_io_sync_fn *submit_io_sync; @@ -227,6 +232,20 @@ struct nvm_addrf { u64 rsv_mask[2]; }; +/* + * Note: The structure size is linked to nvme_nvm_chk_meta such that the same + * buffer can be used when converting from little endian to cpu addressing. + */ +struct nvm_chk_meta { + u8 state; + u8 type; + u8 wi; + u8 rsvd[5]; + u64 slba; + u64 cnlb; + u64 wp; +}; + struct nvm_target { struct list_head list; struct nvm_tgt_dev *dev; @@ -492,6 +511,11 @@ extern struct nvm_dev *nvm_alloc_dev(int); extern int nvm_register(struct nvm_dev *); extern void nvm_unregister(struct nvm_dev *); + +extern int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, + struct nvm_chk_meta *meta, struct ppa_addr ppa, + int nchks); + extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); -- cgit v1.2.3 From 32ef9412c1142c64b372b83d3740f234f4226317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 30 Mar 2018 00:05:20 +0200 Subject: lightnvm: pblk: implement get log report chunk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation of pblk supporting 2.0, implement the get log report chunk in pblk. Also, define the chunk states as given in the 2.0 spec. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index da45efa09bb2..6e0859b9d4d2 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -232,6 +232,19 @@ struct nvm_addrf { u64 rsv_mask[2]; }; +enum { + /* Chunk states */ + NVM_CHK_ST_FREE = 1 << 0, + NVM_CHK_ST_CLOSED = 1 << 1, + NVM_CHK_ST_OPEN = 1 << 2, + NVM_CHK_ST_OFFLINE = 1 << 3, + + /* Chunk types */ + NVM_CHK_TP_W_SEQ = 1 << 0, + NVM_CHK_TP_W_RAN = 1 << 1, + NVM_CHK_TP_SZ_SPEC = 1 << 4, +}; + /* * Note: The structure size is linked to nvme_nvm_chk_meta such that the same * buffer can be used when converting from little endian to cpu addressing. -- cgit v1.2.3 From b575454fa330aab2d65cf17812ca8e1f405ae80d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 14 Feb 2018 01:08:15 +1000 Subject: mm: make memblock_alloc_base_nid() non-static This will be used by powerpc to allocate per-cpu stacks and other data structures node-local where possible. Signed-off-by: Nicholas Piggin [mpe: Drop stray change to memblock_alloc_range() as noticed by akpm] Signed-off-by: Michael Ellerman --- include/linux/memblock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8be5077efb5f..4e1e3d0b002a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -319,6 +319,9 @@ static inline bool memblock_bottom_up(void) phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, ulong flags); +phys_addr_t memblock_alloc_base_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t max_addr, + int nid, ulong flags); phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, -- cgit v1.2.3 From 9daae9bd47cff82a2a06aca23c458d6c79d09d52 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 28 Mar 2018 17:46:54 +0300 Subject: net: Call add/kill vid ndo on vlan filter feature toggling NETIF_F_HW_VLAN_[CS]TAG_FILTER features require more than just a bit flip in dev->features in order to keep the driver in a consistent state. These features notify the driver of each added/removed vlan, but toggling of vlan-filter does not notify the driver accordingly for each of the existing vlans. This patch implements a similar solution to NETIF_F_RX_UDP_TUNNEL_PORT behavior (which notifies the driver about UDP ports in the same manner that vids are reported). Each toggling of the features propagates to the 8021q module, which iterates over the vlans and call add/kill ndo accordingly. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 24 ++++++++++++++++++++++++ include/linux/netdevice.h | 4 ++++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c4a1cff9c768..24d1976c1e61 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -83,6 +83,30 @@ static inline bool is_vlan_dev(const struct net_device *dev) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) #define skb_vlan_tag_get_prio(__skb) ((__skb)->vlan_tci & VLAN_PRIO_MASK) +static inline int vlan_get_rx_ctag_filter_info(struct net_device *dev) +{ + ASSERT_RTNL(); + return notifier_to_errno(call_netdevice_notifiers(NETDEV_CVLAN_FILTER_PUSH_INFO, dev)); +} + +static inline void vlan_drop_rx_ctag_filter_info(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_CVLAN_FILTER_DROP_INFO, dev); +} + +static inline int vlan_get_rx_stag_filter_info(struct net_device *dev) +{ + ASSERT_RTNL(); + return notifier_to_errno(call_netdevice_notifiers(NETDEV_SVLAN_FILTER_PUSH_INFO, dev)); +} + +static inline void vlan_drop_rx_stag_filter_info(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_SVLAN_FILTER_DROP_INFO, dev); +} + /** * struct vlan_pcpu_stats - VLAN percpu rx/tx stats * @rx_packets: number of received packets diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2a2d9cf50aa2..da44dab492e3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2349,6 +2349,10 @@ enum netdev_cmd { NETDEV_UDP_TUNNEL_PUSH_INFO, NETDEV_UDP_TUNNEL_DROP_INFO, NETDEV_CHANGE_TX_QUEUE_LEN, + NETDEV_CVLAN_FILTER_PUSH_INFO, + NETDEV_CVLAN_FILTER_DROP_INFO, + NETDEV_SVLAN_FILTER_PUSH_INFO, + NETDEV_SVLAN_FILTER_DROP_INFO, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); -- cgit v1.2.3 From c6ab3008b6a6ecda22e92f96a1b9cc6b0d0b0a4e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 28 Mar 2018 15:44:15 -0700 Subject: net: phy: phylink: Provide PHY interface to mac_link_{up, down} In preparation for having DSA transition entirely to PHYLINK, we need to pass a PHY interface type to the mac_link_{up,down} callbacks because we may have to make decisions on that (e.g: turn on/off RGMII interfaces etc.). We do not pass an entire phylink_link_state because not all parameters (pause, duplex etc.) are defined when the link is down, only link and interface are. Update mvneta accordingly since it currently implements phylink_mac_ops. Acked-by: Russell King Signed-off-by: Florian Fainelli Acked-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phylink.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index bd137c273d38..e95cc12030fa 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -73,8 +73,10 @@ struct phylink_mac_ops { void (*mac_config)(struct net_device *ndev, unsigned int mode, const struct phylink_link_state *state); void (*mac_an_restart)(struct net_device *ndev); - void (*mac_link_down)(struct net_device *ndev, unsigned int mode); + void (*mac_link_down)(struct net_device *ndev, unsigned int mode, + phy_interface_t interface); void (*mac_link_up)(struct net_device *ndev, unsigned int mode, + phy_interface_t interface, struct phy_device *phy); }; @@ -161,25 +163,31 @@ void mac_an_restart(struct net_device *ndev); * mac_link_down() - take the link down * @ndev: a pointer to a &struct net_device for the MAC. * @mode: link autonegotiation mode + * @interface: link &typedef phy_interface_t mode * * If @mode is not an in-band negotiation mode (as defined by * phylink_autoneg_inband()), force the link down and disable any - * Energy Efficient Ethernet MAC configuration. + * Energy Efficient Ethernet MAC configuration. Interface type + * selection must be done in mac_config(). */ -void mac_link_down(struct net_device *ndev, unsigned int mode); +void mac_link_down(struct net_device *ndev, unsigned int mode, + phy_interface_t interface); /** * mac_link_up() - allow the link to come up * @ndev: a pointer to a &struct net_device for the MAC. * @mode: link autonegotiation mode + * @interface: link &typedef phy_interface_t mode * @phy: any attached phy * * If @mode is not an in-band negotiation mode (as defined by * phylink_autoneg_inband()), allow the link to come up. If @phy * is non-%NULL, configure Energy Efficient Ethernet by calling * phy_init_eee() and perform appropriate MAC configuration for EEE. + * Interface type selection must be done in mac_config(). */ void mac_link_up(struct net_device *ndev, unsigned int mode, + phy_interface_t interface, struct phy_device *phy); #endif -- cgit v1.2.3 From e679c9c1dbfdba07b2a979a076cca74b773be8ce Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 28 Mar 2018 15:44:16 -0700 Subject: sfp/phylink: move module EEPROM ethtool access into netdev core ethtool Provide a pointer to the SFP bus in struct net_device, so that the ethtool module EEPROM methods can access the SFP directly, rather than needing every user to provide a hook for it. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/linux/phylink.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index da44dab492e3..cf44503ea81a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -58,6 +58,7 @@ struct device; struct phy_device; struct dsa_port; +struct sfp_bus; /* 802.11 specific */ struct wireless_dev; /* 802.15.4 specific */ @@ -1662,6 +1663,7 @@ enum netdev_priv_flags { * @priomap: XXX: need comments on this one * @phydev: Physical device may attach itself * for hardware timestamping + * @sfp_bus: attached &struct sfp_bus structure. * * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount @@ -1945,6 +1947,7 @@ struct net_device { struct netprio_map __rcu *priomap; #endif struct phy_device *phydev; + struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; struct lock_class_key *qdisc_running_key; bool proto_down; diff --git a/include/linux/phylink.h b/include/linux/phylink.h index e95cc12030fa..50eeae025f1e 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -219,9 +219,6 @@ void phylink_ethtool_get_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); -int phylink_ethtool_get_module_info(struct phylink *, struct ethtool_modinfo *); -int phylink_ethtool_get_module_eeprom(struct phylink *, - struct ethtool_eeprom *, u8 *); int phylink_get_eee_err(struct phylink *); int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); -- cgit v1.2.3 From 9217e566bdee4583d0a9ea4879c8f5e004886eac Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Thu, 29 Mar 2018 07:29:48 +0200 Subject: of_net: Implement of_get_nvmem_mac_address helper It's common practice to store MAC addresses for network interfaces into nvmem devices. However the code to actually do this in the kernel lacks, so this patch adds of_get_nvmem_mac_address() for drivers to obtain the address from an nvmem cell provider. This is particulary useful on devices where the ethernet interface cannot be configured by the bootloader, for example because it's in an FPGA. Signed-off-by: Mike Looijmans Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/of_net.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 9cd72aab76fe..90d81ee9e6a0 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -13,6 +13,7 @@ struct net_device; extern int of_get_phy_mode(struct device_node *np); extern const void *of_get_mac_address(struct device_node *np); +extern int of_get_nvmem_mac_address(struct device_node *np, void *addr); extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else static inline int of_get_phy_mode(struct device_node *np) @@ -25,6 +26,11 @@ static inline const void *of_get_mac_address(struct device_node *np) return NULL; } +static inline int of_get_nvmem_mac_address(struct device_node *np, void *addr) +{ + return -ENODEV; +} + static inline struct net_device *of_find_net_device_by_node(struct device_node *np) { return NULL; -- cgit v1.2.3 From c769accdf3d8a103940bea2979b65556718567e9 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 29 Mar 2018 19:05:30 +0900 Subject: vlan: Fix vlan insertion for packets without ethernet header In some situation vlan packets do not have ethernet headers. One example is packets from tun devices. Users can specify vlan protocol in tun_pi field instead of IP protocol. When we have a vlan device with reorder_hdr disabled on top of the tun device, such packets from tun devices are untagged in skb_vlan_untag() and vlan headers will be inserted back in vlan_insert_inner_tag(). vlan_insert_inner_tag() however did not expect packets without ethernet headers, so in such a case size argument for memmove() underflowed. We don't need to copy headers for packets which do not have preceding headers of vlan headers, so skip memmove() in that case. Also don't write vlan protocol in skb->data when it does not have enough room for it. Fixes: cbe7128c4b92 ("vlan: Fix out of order vlan headers with reorder header off") Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c4a1cff9c768..7d30892da064 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -323,13 +323,24 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, skb_push(skb, VLAN_HLEN); /* Move the mac header sans proto to the beginning of the new header. */ - memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); + if (likely(mac_len > ETH_TLEN)) + memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); skb->mac_header -= VLAN_HLEN; veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN); /* first, the ethernet type */ - veth->h_vlan_proto = vlan_proto; + if (likely(mac_len >= ETH_TLEN)) { + /* h_vlan_encapsulated_proto should already be populated, and + * skb->data has space for h_vlan_proto + */ + veth->h_vlan_proto = vlan_proto; + } else { + /* h_vlan_encapsulated_proto should not be populated, and + * skb->data has no space for h_vlan_proto + */ + veth->h_vlan_encapsulated_proto = skb->protocol; + } /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); -- cgit v1.2.3 From f97c3dc3c0e8d23a5c4357d182afeef4c67f5c33 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Thu, 29 Mar 2018 13:53:52 +0300 Subject: net/dim: Fix int overflow When calculating difference between samples, the values are multiplied by 100. Large values may cause int overflow when multiplied (usually on first iteration). Fixed by forcing 100 to be of type unsigned long. Fixes: 4c4dbb4a7363 ("net/mlx5e: Move dynamic interrupt coalescing code to include/linux") Signed-off-by: Tal Gilboa Reviewed-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/linux/net_dim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index bebeaad897cc..29ed8fd6379a 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -231,7 +231,7 @@ static inline void net_dim_exit_parking(struct net_dim *dim) } #define IS_SIGNIFICANT_DIFF(val, ref) \ - (((100 * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */ + (((100UL * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */ static inline int net_dim_stats_compare(struct net_dim_stats *curr, struct net_dim_stats *prev) -- cgit v1.2.3 From 9def051018c08e65c532822749e857eb4b2e12e7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Mar 2018 19:01:40 +0200 Subject: crypto: Deduplicate le32_to_cpu_array() and cpu_to_le32_array() Deduplicate le32_to_cpu_array() and cpu_to_le32_array() by moving them to the generic header. No functional change implied. Signed-off-by: Andy Shevchenko Signed-off-by: Herbert Xu --- include/linux/byteorder/generic.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 451aaa0786ae..4b13e0a3e15b 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -156,6 +156,23 @@ static inline void le64_add_cpu(__le64 *var, u64 val) *var = cpu_to_le64(le64_to_cpu(*var) + val); } +/* XXX: this stuff can be optimized */ +static inline void le32_to_cpu_array(u32 *buf, unsigned int words) +{ + while (words--) { + __le32_to_cpus(buf); + buf++; + } +} + +static inline void cpu_to_le32_array(u32 *buf, unsigned int words) +{ + while (words--) { + __cpu_to_le32s(buf); + buf++; + } +} + static inline void be16_add_cpu(__be16 *var, u16 val) { *var = cpu_to_be16(be16_to_cpu(*var) + val); -- cgit v1.2.3 From f44c77630d26ca2c2a60b20c47dd9ce07c4361b3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 7 Mar 2018 15:26:44 -0800 Subject: fs, dax: prepare for dax-specific address_space_operations In preparation for the dax implementation to start associating dax pages to inodes via page->mapping, we need to provide a 'struct address_space_operations' instance for dax. Define some generic VFS aops helpers for dax. These noop implementations are there in the dax case to prevent the VFS from falling back to operations with page-cache assumptions, dax_writeback_mapping_range() may not be referenced in the FS_DAX=n case. Cc: Jeff Moyer Cc: Ross Zwisler Suggested-by: Matthew Wilcox Suggested-by: Jan Kara Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Suggested-by: Dave Chinner Signed-off-by: Dan Williams --- include/linux/dax.h | 12 +++++++++--- include/linux/fs.h | 4 ++++ 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 0185ecdae135..ae27a7efe7ab 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -38,6 +38,7 @@ static inline void put_dax(struct dax_device *dax_dev) } #endif +struct writeback_control; int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int __bdev_dax_supported(struct super_block *sb, int blocksize); @@ -57,6 +58,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev) } struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); +int dax_writeback_mapping_range(struct address_space *mapping, + struct block_device *bdev, struct writeback_control *wbc); #else static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { @@ -76,6 +79,12 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) { return NULL; } + +static inline int dax_writeback_mapping_range(struct address_space *mapping, + struct block_device *bdev, struct writeback_control *wbc) +{ + return -EOPNOTSUPP; +} #endif int dax_read_lock(void); @@ -121,7 +130,4 @@ static inline bool dax_mapping(struct address_space *mapping) return mapping->host && IS_DAX(mapping->host); } -struct writeback_control; -int dax_writeback_mapping_range(struct address_space *mapping, - struct block_device *bdev, struct writeback_control *wbc); #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 79c413985305..44f7f7080faa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3129,6 +3129,10 @@ extern int simple_rmdir(struct inode *, struct dentry *); extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); extern int noop_fsync(struct file *, loff_t, loff_t, int); +extern int noop_set_page_dirty(struct page *page); +extern void noop_invalidatepage(struct page *page, unsigned int offset, + unsigned int length); +extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); extern int simple_write_begin(struct file *file, struct address_space *mapping, -- cgit v1.2.3 From f385178679b6561d2e717567d12e07c7f927ee59 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Fri, 30 Mar 2018 09:20:59 +0900 Subject: lib/scatterlist: add sg_init_marker() helper sg_init_marker initializes sg_magic in the sg table and calls sg_mark_end() on the last entry of the table. This can be useful to avoid memset in sg_init_table() when scatterlist is already zeroed out For example: when scatterlist is embedded inside other struct and that container struct is zeroed out Suggested-by: Daniel Borkmann Signed-off-by: Prashant Bhole Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/scatterlist.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 22b2131bcdcd..aa5d4eb725f5 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -248,6 +248,24 @@ static inline void *sg_virt(struct scatterlist *sg) return page_address(sg_page(sg)) + sg->offset; } +/** + * sg_init_marker - Initialize markers in sg table + * @sgl: The SG table + * @nents: Number of entries in table + * + **/ +static inline void sg_init_marker(struct scatterlist *sgl, + unsigned int nents) +{ +#ifdef CONFIG_DEBUG_SG + unsigned int i; + + for (i = 0; i < nents; i++) + sgl[i].sg_magic = SG_MAGIC; +#endif + sg_mark_end(&sgl[nents - 1]); +} + int sg_nents(struct scatterlist *sg); int sg_nents_for_len(struct scatterlist *sg, u64 len); struct scatterlist *sg_next(struct scatterlist *); -- cgit v1.2.3 From 02bfeb484230dfd073148a17253aeb1717ce769c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 9 Mar 2018 11:21:25 -0600 Subject: PCI/portdrv: Simplify PCIe feature permission checking Some PCIe features (AER, DPC, hotplug, PME) can be managed by either the platform firmware or the OS, so the host bridge driver may have to request permission from the platform before using them. On ACPI systems, this is done by negotiate_os_control() in acpi_pci_root_add(). The PCIe port driver later uses pcie_port_platform_notify() and pcie_port_acpi_setup() to figure out whether it can use these features. But all we need is a single bit for each service, so these interfaces are needlessly complicated. Simplify this by adding bits in the struct pci_host_bridge to show when the OS has permission to use each feature: + unsigned int native_aer:1; /* OS may use PCIe AER */ + unsigned int native_hotplug:1; /* OS may use PCIe hotplug */ + unsigned int native_pme:1; /* OS may use PCIe PME */ These are set when we create a host bridge, and the host bridge driver can clear the bits corresponding to any feature the platform doesn't want us to use. Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 024a1beda008..a04b7abc6b7a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -469,6 +469,9 @@ struct pci_host_bridge { struct msi_controller *msi; unsigned int ignore_reset_delay:1; /* For entire hierarchy */ unsigned int no_ext_tags:1; /* No Extended Tags */ + unsigned int native_aer:1; /* OS may use PCIe AER */ + unsigned int native_hotplug:1; /* OS may use PCIe hotplug */ + unsigned int native_pme:1; /* OS may use PCIe PME */ /* Resource alignment requirements */ resource_size_t (*align_resource)(struct pci_dev *dev, const struct resource *res, -- cgit v1.2.3 From 842b447f0074b93e9f7db60039fdc72ec14bef9a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 9 Mar 2018 11:21:29 -0600 Subject: PCI/portdrv: Encapsulate pcie_ports_auto inside the port driver "pcie_ports_auto" is only used inside the PCIe port driver itself, so move it from include/linux/pci.h to portdrv.h so it's not visible to the whole kernel. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index a04b7abc6b7a..dc70a3ce8dc5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1449,10 +1449,8 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d, #ifdef CONFIG_PCIEPORTBUS extern bool pcie_ports_disabled; -extern bool pcie_ports_auto; #else #define pcie_ports_disabled true -#define pcie_ports_auto false #endif #ifdef CONFIG_PCIEASPM -- cgit v1.2.3 From ad32eb2df801548a4b55802384fbbfbc04d76bfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sun, 18 Mar 2018 13:58:06 +0100 Subject: PCI: Always define the of_node helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simply move these inline functions outside the ifdef instead of duplicating them as stubs in the !OF case. The struct device of_node field does not depend on OF. This also fixes the missing stubbed pci_bus_to_OF_node(). Signed-off-by: Bjørn Mork Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 024a1beda008..d0396da9160e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2182,24 +2182,11 @@ int pci_parse_request_of_pci_ranges(struct device *dev, /* Arch may override this (weak) */ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus); -static inline struct device_node * -pci_device_to_OF_node(const struct pci_dev *pdev) -{ - return pdev ? pdev->dev.of_node : NULL; -} - -static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus) -{ - return bus ? bus->dev.of_node : NULL; -} - #else /* CONFIG_OF */ static inline void pci_set_of_node(struct pci_dev *dev) { } static inline void pci_release_of_node(struct pci_dev *dev) { } static inline void pci_set_bus_of_node(struct pci_bus *bus) { } static inline void pci_release_bus_of_node(struct pci_bus *bus) { } -static inline struct device_node * -pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; } static inline struct irq_domain * pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; } static inline int pci_parse_request_of_pci_ranges(struct device *dev, @@ -2210,6 +2197,17 @@ static inline int pci_parse_request_of_pci_ranges(struct device *dev, } #endif /* CONFIG_OF */ +static inline struct device_node * +pci_device_to_OF_node(const struct pci_dev *pdev) +{ + return pdev ? pdev->dev.of_node : NULL; +} + +static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus) +{ + return bus ? bus->dev.of_node : NULL; +} + #ifdef CONFIG_ACPI struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus); -- cgit v1.2.3 From b2d3907c234618c20239127f2c234b4e92adf5ef Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 13 Feb 2018 17:07:02 -0800 Subject: net/mlx5: Eliminate query xsrq dead code 1. This function is not used anywhere in mlx5 driver 2. It has a memcpy statement that makes no sense and produces build warning with gcc8 drivers/net/ethernet/mellanox/mlx5/core/transobj.c: In function 'mlx5_core_query_xsrq': drivers/net/ethernet/mellanox/mlx5/core/transobj.c:347:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict] Fixes: 01949d0109ee ("net/mlx5_core: Enable XRCs and SRQs when using ISSI > 0") Reported-by: Arnd Bergmann Signed-off-by: Saeed Mahameed --- include/linux/mlx5/transobj.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 80d7aa8b2831..83a33a1873a6 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -67,7 +67,6 @@ int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rmpn); int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); -int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, -- cgit v1.2.3 From 619a8f2a42f1031cdbd74435b6a9191eb4913139 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 7 Feb 2018 14:41:25 +0200 Subject: net/mlx5e: Use linear SKB in Striding RQ Current Striding RQ HW feature utilizes the RX buffers so that there is no wasted room between the strides. This maximises the memory utilization. This prevents the use of build_skb() (which requires headroom and tailroom), and demands to memcpy the packets headers into the skb linear part. In this patch, whenever a set of conditions holds, we apply an RQ configuration that allows combining the use of linear SKB on top of a Striding RQ. To use build_skb() with Striding RQ, the following must hold: 1. packet does not cross a page boundary. 2. there is enough headroom and tailroom surrounding the packet. We can satisfy 1 and 2 by configuring: stride size = MTU + headroom + tailoom. This is possible only when: a. (MTU - headroom - tailoom) does not exceed PAGE_SIZE. b. HW LRO is turned off. Using linear SKB has many advantages: - Saves a memcpy of the headers. - No page-boundary checks in datapath. - No filler CQEs. - Significantly smaller CQ. - SKB data continuously resides in linear part, and not split to small amount (linear part) and large amount (fragment). This saves datapath cycles in driver and improves utilization of SKB fragments in GRO. - The fragments of a resulting GRO SKB follow the IP forwarding assumption of equal-size fragments. Some implementation details: HW writes the packets to the beginning of a stride, i.e. does not keep headroom. To overcome this we make sure we can extend backwards and use the last bytes of stride i-1. Extra care is needed for stride 0 as it has no preceding stride. We make sure headroom bytes are available by shifting the buffer pointer passed to HW by headroom bytes. This configuration now becomes default, whenever capable. Of course, this implies turning LRO off. Performance testing: ConnectX-5, single core, single RX ring, default MTU. UDP packet rate, early drop in TC layer: -------------------------------------------- | pkt size | before | after | ratio | -------------------------------------------- | 1500byte | 4.65 Mpps | 5.96 Mpps | 1.28x | | 500byte | 5.23 Mpps | 5.97 Mpps | 1.14x | | 64byte | 5.94 Mpps | 5.96 Mpps | 1.00x | -------------------------------------------- TCP streams: ~20% gain Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 3 +++ include/linux/mlx5/mlx5_ifc.h | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4b5939c78cdd..12758595459b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -782,6 +782,9 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) return (u64)lo | ((u64)hi << 32); } +#define MLX5_MPWQE_LOG_NUM_STRIDES_BASE (9) +#define MLX5_MPWQE_LOG_STRIDE_SZ_BASE (6) + struct mpwrq_cqe_bc { __be16 filler_consumed_strides; __be16 byte_cnt; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c19e611d2782..d25011f84815 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1038,7 +1038,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_398[0x3]; u8 log_max_tis_per_sq[0x5]; - u8 reserved_at_3a0[0x3]; + u8 ext_stride_num_range[0x1]; + u8 reserved_at_3a1[0x2]; u8 log_max_stride_sz_rq[0x5]; u8 reserved_at_3a8[0x3]; u8 log_min_stride_sz_rq[0x5]; @@ -1205,9 +1206,9 @@ struct mlx5_ifc_wq_bits { u8 log_hairpin_num_packets[0x5]; u8 reserved_at_128[0x3]; u8 log_hairpin_data_sz[0x5]; - u8 reserved_at_130[0x5]; - u8 log_wqe_num_of_strides[0x3]; + u8 reserved_at_130[0x4]; + u8 log_wqe_num_of_strides[0x4]; u8 two_byte_shift_en[0x1]; u8 reserved_at_139[0x4]; u8 log_wqe_stride_size[0x3]; -- cgit v1.2.3 From 5e43f899b03a3492ce5fc44e8900becb04dae9c0 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:00 -0700 Subject: bpf: Check attach type at prog load time == The problem == There are use-cases when a program of some type can be attached to multiple attach points and those attach points must have different permissions to access context or to call helpers. E.g. context structure may have fields for both IPv4 and IPv6 but it doesn't make sense to read from / write to IPv6 field when attach point is somewhere in IPv4 stack. Same applies to BPF-helpers: it may make sense to call some helper from some attach point, but not from other for same prog type. == The solution == Introduce `expected_attach_type` field in in `struct bpf_attr` for `BPF_PROG_LOAD` command. If scenario described in "The problem" section is the case for some prog type, the field will be checked twice: 1) At load time prog type is checked to see if attach type for it must be known to validate program permissions correctly. Prog will be rejected with EINVAL if it's the case and `expected_attach_type` is not specified or has invalid value. 2) At attach time `attach_type` is compared with `expected_attach_type`, if prog type requires to have one, and, if they differ, attach will be rejected with EINVAL. The `expected_attach_type` is now available as part of `struct bpf_prog` in both `bpf_verifier_ops->is_valid_access()` and `bpf_verifier_ops->get_func_proto()` () and can be used to check context accesses and calls to helpers correspondingly. Initially the idea was discussed by Alexei Starovoitov and Daniel Borkmann here: https://marc.info/?l=linux-netdev&m=152107378717201&w=2 Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 5 ++++- include/linux/filter.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 819229c80eca..95a7abd0ee92 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -208,12 +208,15 @@ struct bpf_prog_ops { struct bpf_verifier_ops { /* return eBPF function prototype for verification */ - const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); + const struct bpf_func_proto * + (*get_func_proto)(enum bpf_func_id func_id, + const struct bpf_prog *prog); /* return true if 'size' wide access at offset 'off' within bpf_context * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, struct bpf_insn_access_aux *info); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); diff --git a/include/linux/filter.h b/include/linux/filter.h index 897ff3d95968..13c044e4832d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -469,6 +469,7 @@ struct bpf_prog { is_func:1, /* program is a bpf function */ kprobe_override:1; /* Do we override a kprobe? */ enum bpf_prog_type type; /* Type of BPF program */ + enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ u8 tag[BPF_TAG_SIZE]; -- cgit v1.2.3 From 4fbac77d2d092b475dda9eea66da674369665427 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:02 -0700 Subject: bpf: Hooks for sys_bind == The problem == There is a use-case when all processes inside a cgroup should use one single IP address on a host that has multiple IP configured. Those processes should use the IP for both ingress and egress, for TCP and UDP traffic. So TCP/UDP servers should be bound to that IP to accept incoming connections on it, and TCP/UDP clients should make outgoing connections from that IP. It should not require changing application code since it's often not possible. Currently it's solved by intercepting glibc wrappers around syscalls such as `bind(2)` and `connect(2)`. It's done by a shared library that is preloaded for every process in a cgroup so that whenever TCP/UDP server calls `bind(2)`, the library replaces IP in sockaddr before passing arguments to syscall. When application calls `connect(2)` the library transparently binds the local end of connection to that IP (`bind(2)` with `IP_BIND_ADDRESS_NO_PORT` to avoid performance penalty). Shared library approach is fragile though, e.g.: * some applications clear env vars (incl. `LD_PRELOAD`); * `/etc/ld.so.preload` doesn't help since some applications are linked with option `-z nodefaultlib`; * other applications don't use glibc and there is nothing to intercept. == The solution == The patch provides much more reliable in-kernel solution for the 1st part of the problem: binding TCP/UDP servers on desired IP. It does not depend on application environment and implementation details (whether glibc is used or not). It adds new eBPF program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` and attach types `BPF_CGROUP_INET4_BIND` and `BPF_CGROUP_INET6_BIND` (similar to already existing `BPF_CGROUP_INET_SOCK_CREATE`). The new program type is intended to be used with sockets (`struct sock`) in a cgroup and provided by user `struct sockaddr`. Pointers to both of them are parts of the context passed to programs of newly added types. The new attach types provides hooks in `bind(2)` system call for both IPv4 and IPv6 so that one can write a program to override IP addresses and ports user program tries to bind to and apply such a program for whole cgroup. == Implementation notes == [1] Separate attach types for `AF_INET` and `AF_INET6` are added intentionally to prevent reading/writing to offsets that don't make sense for corresponding socket family. E.g. if user passes `sockaddr_in` it doesn't make sense to read from / write to `user_ip6[]` context fields. [2] The write access to `struct bpf_sock_addr_kern` is implemented using special field as an additional "register". There are just two registers in `sock_addr_convert_ctx_access`: `src` with value to write and `dst` with pointer to context that can't be changed not to break later instructions. But the fields, allowed to write to, are not available directly and to access them address of corresponding pointer has to be loaded first. To get additional register the 1st not used by `src` and `dst` one is taken, its content is saved to `bpf_sock_addr_kern.tmp_reg`, then the register is used to load address of pointer field, and finally the register's content is restored from the temporary field after writing `src` value. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 21 +++++++++++++++++++++ include/linux/bpf_types.h | 1 + include/linux/filter.h | 10 ++++++++++ 3 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8a4566691c8f..67dc4a6471ad 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -6,6 +6,7 @@ #include struct sock; +struct sockaddr; struct cgroup; struct sk_buff; struct bpf_sock_ops_kern; @@ -63,6 +64,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, int __cgroup_bpf_run_filter_sk(struct sock *sk, enum bpf_attach_type type); +int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, + struct sockaddr *uaddr, + enum bpf_attach_type type); + int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, enum bpf_attach_type type); @@ -103,6 +108,20 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, __ret; \ }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND) + +#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND) + #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ({ \ int __ret = 0; \ @@ -135,6 +154,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 6d7243bfb0ff..2b28fcf6f6ae 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -8,6 +8,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) diff --git a/include/linux/filter.h b/include/linux/filter.h index 13c044e4832d..fc4e8f91b03d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1021,6 +1021,16 @@ static inline int bpf_tell_extensions(void) return SKF_AD_MAX; } +struct bpf_sock_addr_kern { + struct sock *sk; + struct sockaddr *uaddr; + /* Temporary "register" to make indirect stores to nested structures + * defined above. We need three registers to make such a store, but + * only two (src and dst) are available at convert_ctx_access time + */ + u64 tmp_reg; +}; + struct bpf_sock_ops_kern { struct sock *sk; u32 op; -- cgit v1.2.3 From d74bad4e74ee373787a9ae24197c17b7cdc428d5 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:05 -0700 Subject: bpf: Hooks for sys_connect == The problem == See description of the problem in the initial patch of this patch set. == The solution == The patch provides much more reliable in-kernel solution for the 2nd part of the problem: making outgoing connecttion from desired IP. It adds new attach types `BPF_CGROUP_INET4_CONNECT` and `BPF_CGROUP_INET6_CONNECT` for program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that can be used to override both source and destination of a connection at connect(2) time. Local end of connection can be bound to desired IP using newly introduced BPF-helper `bpf_bind()`. It allows to bind to only IP though, and doesn't support binding to port, i.e. leverages `IP_BIND_ADDRESS_NO_PORT` socket option. There are two reasons for this: * looking for a free port is expensive and can affect performance significantly; * there is no use-case for port. As for remote end (`struct sockaddr *` passed by user), both parts of it can be overridden, remote IP and remote port. It's useful if an application inside cgroup wants to connect to another application inside same cgroup or to itself, but knows nothing about IP assigned to the cgroup. Support is added for IPv4 and IPv6, for TCP and UDP. IPv4 and IPv6 have separate attach types for same reason as sys_bind hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. == Implementation notes == The patch introduces new field in `struct proto`: `pre_connect` that is a pointer to a function with same signature as `connect` but is called before it. The reason is in some cases BPF hooks should be called way before control is passed to `sk->sk_prot->connect`. Specifically `inet_dgram_connect` autobinds socket before calling `sk->sk_prot->connect` and there is no way to call `bpf_bind()` from hooks from e.g. `ip4_datagram_connect` or `ip6_datagram_connect` since it'd cause double-bind. On the other hand `proto.pre_connect` provides a flexible way to add BPF hooks for connect only for necessary `proto` and call them at desired time before `connect`. Since `bpf_bind()` is allowed to bind only to IP and autobind in `inet_dgram_connect` binds only port there is no chance of double-bind. bpf_bind() sets `force_bind_address_no_port` to bind to only IP despite of value of `bind_address_no_port` socket field. bpf_bind() sets `with_lock` to `false` when calling to __inet_bind() and __inet6_bind() since all call-sites, where bpf_bind() is called, already hold socket lock. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 67dc4a6471ad..c6ab295e6dcb 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -116,12 +116,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, __ret; \ }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) { \ + lock_sock(sk); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \ + release_sock(sk); \ + } \ + __ret; \ +}) + #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND) #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND) +#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \ + sk->sk_prot->pre_connect) + +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT) + +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT) + #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ ({ \ int __ret = 0; \ @@ -151,11 +177,16 @@ struct cgroup_bpf {}; static inline void cgroup_bpf_put(struct cgroup *cgrp) {} static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } +#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) -- cgit v1.2.3 From aac3fc320d9404f2665a8b1249dc3170d5fa3caf Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 30 Mar 2018 15:08:07 -0700 Subject: bpf: Post-hooks for sys_bind "Post-hooks" are hooks that are called right before returning from sys_bind. At this time IP and port are already allocated and no further changes to `struct sock` can happen before returning from sys_bind but BPF program has a chance to inspect the socket and change sys_bind result. Specifically it can e.g. inspect what port was allocated and if it doesn't satisfy some policy, BPF program can force sys_bind to fail and return EPERM to user. Another example of usage is recording the IP:port pair to some map to use it in later calls to sys_connect. E.g. if some TCP server inside cgroup was bound to some IP:port_n, it can be recorded to a map. And later when some TCP client inside same cgroup is trying to connect to 127.0.0.1:port_n, BPF hook for sys_connect can override the destination and connect application to IP:port_n instead of 127.0.0.1:port_n. That helps forcing all applications inside a cgroup to use desired IP and not break those applications if they e.g. use localhost to communicate between each other. == Implementation details == Post-hooks are implemented as two new attach types `BPF_CGROUP_INET4_POST_BIND` and `BPF_CGROUP_INET6_POST_BIND` for existing prog type `BPF_PROG_TYPE_CGROUP_SOCK`. Separate attach types for IPv4 and IPv6 are introduced to avoid access to IPv6 field in `struct sock` from `inet_bind()` and to IPv4 field from `inet6_bind()` since those fields might not make sense in such cases. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c6ab295e6dcb..30d15e64b993 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -98,16 +98,24 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, __ret; \ }) -#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ +#define BPF_CGROUP_RUN_SK_PROG(sk, type) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) { \ - __ret = __cgroup_bpf_run_filter_sk(sk, \ - BPF_CGROUP_INET_SOCK_CREATE); \ + __ret = __cgroup_bpf_run_filter_sk(sk, type); \ } \ __ret; \ }) +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ + BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE) + +#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \ + BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND) + +#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ + BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND) + #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \ ({ \ int __ret = 0; \ @@ -183,6 +191,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -- cgit v1.2.3 From df0ce17331e2501dbffc060041dfc6c5f85227b5 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Thu, 29 Mar 2018 01:28:23 +0000 Subject: security: convert security hooks to use hlist This changes security_hook_heads to use hlist_heads instead of the circular doubly-linked list heads. This should cut down the size of the struct by about half. In addition, it allows mutation of the hooks at the tail of the callback list without having to modify the head. The longer-term purpose of this is to enable making the heads read only. Signed-off-by: Sargun Dhillon Reviewed-by: Tetsuo Handa Acked-by: Casey Schaufler Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 428 +++++++++++++++++++++++----------------------- 1 file changed, 214 insertions(+), 214 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e0ac011d07a5..ac491137b10a 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1731,230 +1731,230 @@ union security_list_options { }; struct security_hook_heads { - struct list_head binder_set_context_mgr; - struct list_head binder_transaction; - struct list_head binder_transfer_binder; - struct list_head binder_transfer_file; - struct list_head ptrace_access_check; - struct list_head ptrace_traceme; - struct list_head capget; - struct list_head capset; - struct list_head capable; - struct list_head quotactl; - struct list_head quota_on; - struct list_head syslog; - struct list_head settime; - struct list_head vm_enough_memory; - struct list_head bprm_set_creds; - struct list_head bprm_check_security; - struct list_head bprm_committing_creds; - struct list_head bprm_committed_creds; - struct list_head sb_alloc_security; - struct list_head sb_free_security; - struct list_head sb_copy_data; - struct list_head sb_remount; - struct list_head sb_kern_mount; - struct list_head sb_show_options; - struct list_head sb_statfs; - struct list_head sb_mount; - struct list_head sb_umount; - struct list_head sb_pivotroot; - struct list_head sb_set_mnt_opts; - struct list_head sb_clone_mnt_opts; - struct list_head sb_parse_opts_str; - struct list_head dentry_init_security; - struct list_head dentry_create_files_as; + struct hlist_head binder_set_context_mgr; + struct hlist_head binder_transaction; + struct hlist_head binder_transfer_binder; + struct hlist_head binder_transfer_file; + struct hlist_head ptrace_access_check; + struct hlist_head ptrace_traceme; + struct hlist_head capget; + struct hlist_head capset; + struct hlist_head capable; + struct hlist_head quotactl; + struct hlist_head quota_on; + struct hlist_head syslog; + struct hlist_head settime; + struct hlist_head vm_enough_memory; + struct hlist_head bprm_set_creds; + struct hlist_head bprm_check_security; + struct hlist_head bprm_committing_creds; + struct hlist_head bprm_committed_creds; + struct hlist_head sb_alloc_security; + struct hlist_head sb_free_security; + struct hlist_head sb_copy_data; + struct hlist_head sb_remount; + struct hlist_head sb_kern_mount; + struct hlist_head sb_show_options; + struct hlist_head sb_statfs; + struct hlist_head sb_mount; + struct hlist_head sb_umount; + struct hlist_head sb_pivotroot; + struct hlist_head sb_set_mnt_opts; + struct hlist_head sb_clone_mnt_opts; + struct hlist_head sb_parse_opts_str; + struct hlist_head dentry_init_security; + struct hlist_head dentry_create_files_as; #ifdef CONFIG_SECURITY_PATH - struct list_head path_unlink; - struct list_head path_mkdir; - struct list_head path_rmdir; - struct list_head path_mknod; - struct list_head path_truncate; - struct list_head path_symlink; - struct list_head path_link; - struct list_head path_rename; - struct list_head path_chmod; - struct list_head path_chown; - struct list_head path_chroot; + struct hlist_head path_unlink; + struct hlist_head path_mkdir; + struct hlist_head path_rmdir; + struct hlist_head path_mknod; + struct hlist_head path_truncate; + struct hlist_head path_symlink; + struct hlist_head path_link; + struct hlist_head path_rename; + struct hlist_head path_chmod; + struct hlist_head path_chown; + struct hlist_head path_chroot; #endif - struct list_head inode_alloc_security; - struct list_head inode_free_security; - struct list_head inode_init_security; - struct list_head inode_create; - struct list_head inode_link; - struct list_head inode_unlink; - struct list_head inode_symlink; - struct list_head inode_mkdir; - struct list_head inode_rmdir; - struct list_head inode_mknod; - struct list_head inode_rename; - struct list_head inode_readlink; - struct list_head inode_follow_link; - struct list_head inode_permission; - struct list_head inode_setattr; - struct list_head inode_getattr; - struct list_head inode_setxattr; - struct list_head inode_post_setxattr; - struct list_head inode_getxattr; - struct list_head inode_listxattr; - struct list_head inode_removexattr; - struct list_head inode_need_killpriv; - struct list_head inode_killpriv; - struct list_head inode_getsecurity; - struct list_head inode_setsecurity; - struct list_head inode_listsecurity; - struct list_head inode_getsecid; - struct list_head inode_copy_up; - struct list_head inode_copy_up_xattr; - struct list_head file_permission; - struct list_head file_alloc_security; - struct list_head file_free_security; - struct list_head file_ioctl; - struct list_head mmap_addr; - struct list_head mmap_file; - struct list_head file_mprotect; - struct list_head file_lock; - struct list_head file_fcntl; - struct list_head file_set_fowner; - struct list_head file_send_sigiotask; - struct list_head file_receive; - struct list_head file_open; - struct list_head task_alloc; - struct list_head task_free; - struct list_head cred_alloc_blank; - struct list_head cred_free; - struct list_head cred_prepare; - struct list_head cred_transfer; - struct list_head kernel_act_as; - struct list_head kernel_create_files_as; - struct list_head kernel_read_file; - struct list_head kernel_post_read_file; - struct list_head kernel_module_request; - struct list_head task_fix_setuid; - struct list_head task_setpgid; - struct list_head task_getpgid; - struct list_head task_getsid; - struct list_head task_getsecid; - struct list_head task_setnice; - struct list_head task_setioprio; - struct list_head task_getioprio; - struct list_head task_prlimit; - struct list_head task_setrlimit; - struct list_head task_setscheduler; - struct list_head task_getscheduler; - struct list_head task_movememory; - struct list_head task_kill; - struct list_head task_prctl; - struct list_head task_to_inode; - struct list_head ipc_permission; - struct list_head ipc_getsecid; - struct list_head msg_msg_alloc_security; - struct list_head msg_msg_free_security; - struct list_head msg_queue_alloc_security; - struct list_head msg_queue_free_security; - struct list_head msg_queue_associate; - struct list_head msg_queue_msgctl; - struct list_head msg_queue_msgsnd; - struct list_head msg_queue_msgrcv; - struct list_head shm_alloc_security; - struct list_head shm_free_security; - struct list_head shm_associate; - struct list_head shm_shmctl; - struct list_head shm_shmat; - struct list_head sem_alloc_security; - struct list_head sem_free_security; - struct list_head sem_associate; - struct list_head sem_semctl; - struct list_head sem_semop; - struct list_head netlink_send; - struct list_head d_instantiate; - struct list_head getprocattr; - struct list_head setprocattr; - struct list_head ismaclabel; - struct list_head secid_to_secctx; - struct list_head secctx_to_secid; - struct list_head release_secctx; - struct list_head inode_invalidate_secctx; - struct list_head inode_notifysecctx; - struct list_head inode_setsecctx; - struct list_head inode_getsecctx; + struct hlist_head inode_alloc_security; + struct hlist_head inode_free_security; + struct hlist_head inode_init_security; + struct hlist_head inode_create; + struct hlist_head inode_link; + struct hlist_head inode_unlink; + struct hlist_head inode_symlink; + struct hlist_head inode_mkdir; + struct hlist_head inode_rmdir; + struct hlist_head inode_mknod; + struct hlist_head inode_rename; + struct hlist_head inode_readlink; + struct hlist_head inode_follow_link; + struct hlist_head inode_permission; + struct hlist_head inode_setattr; + struct hlist_head inode_getattr; + struct hlist_head inode_setxattr; + struct hlist_head inode_post_setxattr; + struct hlist_head inode_getxattr; + struct hlist_head inode_listxattr; + struct hlist_head inode_removexattr; + struct hlist_head inode_need_killpriv; + struct hlist_head inode_killpriv; + struct hlist_head inode_getsecurity; + struct hlist_head inode_setsecurity; + struct hlist_head inode_listsecurity; + struct hlist_head inode_getsecid; + struct hlist_head inode_copy_up; + struct hlist_head inode_copy_up_xattr; + struct hlist_head file_permission; + struct hlist_head file_alloc_security; + struct hlist_head file_free_security; + struct hlist_head file_ioctl; + struct hlist_head mmap_addr; + struct hlist_head mmap_file; + struct hlist_head file_mprotect; + struct hlist_head file_lock; + struct hlist_head file_fcntl; + struct hlist_head file_set_fowner; + struct hlist_head file_send_sigiotask; + struct hlist_head file_receive; + struct hlist_head file_open; + struct hlist_head task_alloc; + struct hlist_head task_free; + struct hlist_head cred_alloc_blank; + struct hlist_head cred_free; + struct hlist_head cred_prepare; + struct hlist_head cred_transfer; + struct hlist_head kernel_act_as; + struct hlist_head kernel_create_files_as; + struct hlist_head kernel_read_file; + struct hlist_head kernel_post_read_file; + struct hlist_head kernel_module_request; + struct hlist_head task_fix_setuid; + struct hlist_head task_setpgid; + struct hlist_head task_getpgid; + struct hlist_head task_getsid; + struct hlist_head task_getsecid; + struct hlist_head task_setnice; + struct hlist_head task_setioprio; + struct hlist_head task_getioprio; + struct hlist_head task_prlimit; + struct hlist_head task_setrlimit; + struct hlist_head task_setscheduler; + struct hlist_head task_getscheduler; + struct hlist_head task_movememory; + struct hlist_head task_kill; + struct hlist_head task_prctl; + struct hlist_head task_to_inode; + struct hlist_head ipc_permission; + struct hlist_head ipc_getsecid; + struct hlist_head msg_msg_alloc_security; + struct hlist_head msg_msg_free_security; + struct hlist_head msg_queue_alloc_security; + struct hlist_head msg_queue_free_security; + struct hlist_head msg_queue_associate; + struct hlist_head msg_queue_msgctl; + struct hlist_head msg_queue_msgsnd; + struct hlist_head msg_queue_msgrcv; + struct hlist_head shm_alloc_security; + struct hlist_head shm_free_security; + struct hlist_head shm_associate; + struct hlist_head shm_shmctl; + struct hlist_head shm_shmat; + struct hlist_head sem_alloc_security; + struct hlist_head sem_free_security; + struct hlist_head sem_associate; + struct hlist_head sem_semctl; + struct hlist_head sem_semop; + struct hlist_head netlink_send; + struct hlist_head d_instantiate; + struct hlist_head getprocattr; + struct hlist_head setprocattr; + struct hlist_head ismaclabel; + struct hlist_head secid_to_secctx; + struct hlist_head secctx_to_secid; + struct hlist_head release_secctx; + struct hlist_head inode_invalidate_secctx; + struct hlist_head inode_notifysecctx; + struct hlist_head inode_setsecctx; + struct hlist_head inode_getsecctx; #ifdef CONFIG_SECURITY_NETWORK - struct list_head unix_stream_connect; - struct list_head unix_may_send; - struct list_head socket_create; - struct list_head socket_post_create; - struct list_head socket_bind; - struct list_head socket_connect; - struct list_head socket_listen; - struct list_head socket_accept; - struct list_head socket_sendmsg; - struct list_head socket_recvmsg; - struct list_head socket_getsockname; - struct list_head socket_getpeername; - struct list_head socket_getsockopt; - struct list_head socket_setsockopt; - struct list_head socket_shutdown; - struct list_head socket_sock_rcv_skb; - struct list_head socket_getpeersec_stream; - struct list_head socket_getpeersec_dgram; - struct list_head sk_alloc_security; - struct list_head sk_free_security; - struct list_head sk_clone_security; - struct list_head sk_getsecid; - struct list_head sock_graft; - struct list_head inet_conn_request; - struct list_head inet_csk_clone; - struct list_head inet_conn_established; - struct list_head secmark_relabel_packet; - struct list_head secmark_refcount_inc; - struct list_head secmark_refcount_dec; - struct list_head req_classify_flow; - struct list_head tun_dev_alloc_security; - struct list_head tun_dev_free_security; - struct list_head tun_dev_create; - struct list_head tun_dev_attach_queue; - struct list_head tun_dev_attach; - struct list_head tun_dev_open; + struct hlist_head unix_stream_connect; + struct hlist_head unix_may_send; + struct hlist_head socket_create; + struct hlist_head socket_post_create; + struct hlist_head socket_bind; + struct hlist_head socket_connect; + struct hlist_head socket_listen; + struct hlist_head socket_accept; + struct hlist_head socket_sendmsg; + struct hlist_head socket_recvmsg; + struct hlist_head socket_getsockname; + struct hlist_head socket_getpeername; + struct hlist_head socket_getsockopt; + struct hlist_head socket_setsockopt; + struct hlist_head socket_shutdown; + struct hlist_head socket_sock_rcv_skb; + struct hlist_head socket_getpeersec_stream; + struct hlist_head socket_getpeersec_dgram; + struct hlist_head sk_alloc_security; + struct hlist_head sk_free_security; + struct hlist_head sk_clone_security; + struct hlist_head sk_getsecid; + struct hlist_head sock_graft; + struct hlist_head inet_conn_request; + struct hlist_head inet_csk_clone; + struct hlist_head inet_conn_established; + struct hlist_head secmark_relabel_packet; + struct hlist_head secmark_refcount_inc; + struct hlist_head secmark_refcount_dec; + struct hlist_head req_classify_flow; + struct hlist_head tun_dev_alloc_security; + struct hlist_head tun_dev_free_security; + struct hlist_head tun_dev_create; + struct hlist_head tun_dev_attach_queue; + struct hlist_head tun_dev_attach; + struct hlist_head tun_dev_open; #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND - struct list_head ib_pkey_access; - struct list_head ib_endport_manage_subnet; - struct list_head ib_alloc_security; - struct list_head ib_free_security; + struct hlist_head ib_pkey_access; + struct hlist_head ib_endport_manage_subnet; + struct hlist_head ib_alloc_security; + struct hlist_head ib_free_security; #endif /* CONFIG_SECURITY_INFINIBAND */ #ifdef CONFIG_SECURITY_NETWORK_XFRM - struct list_head xfrm_policy_alloc_security; - struct list_head xfrm_policy_clone_security; - struct list_head xfrm_policy_free_security; - struct list_head xfrm_policy_delete_security; - struct list_head xfrm_state_alloc; - struct list_head xfrm_state_alloc_acquire; - struct list_head xfrm_state_free_security; - struct list_head xfrm_state_delete_security; - struct list_head xfrm_policy_lookup; - struct list_head xfrm_state_pol_flow_match; - struct list_head xfrm_decode_session; + struct hlist_head xfrm_policy_alloc_security; + struct hlist_head xfrm_policy_clone_security; + struct hlist_head xfrm_policy_free_security; + struct hlist_head xfrm_policy_delete_security; + struct hlist_head xfrm_state_alloc; + struct hlist_head xfrm_state_alloc_acquire; + struct hlist_head xfrm_state_free_security; + struct hlist_head xfrm_state_delete_security; + struct hlist_head xfrm_policy_lookup; + struct hlist_head xfrm_state_pol_flow_match; + struct hlist_head xfrm_decode_session; #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS - struct list_head key_alloc; - struct list_head key_free; - struct list_head key_permission; - struct list_head key_getsecurity; + struct hlist_head key_alloc; + struct hlist_head key_free; + struct hlist_head key_permission; + struct hlist_head key_getsecurity; #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT - struct list_head audit_rule_init; - struct list_head audit_rule_known; - struct list_head audit_rule_match; - struct list_head audit_rule_free; + struct hlist_head audit_rule_init; + struct hlist_head audit_rule_known; + struct hlist_head audit_rule_match; + struct hlist_head audit_rule_free; #endif /* CONFIG_AUDIT */ #ifdef CONFIG_BPF_SYSCALL - struct list_head bpf; - struct list_head bpf_map; - struct list_head bpf_prog; - struct list_head bpf_map_alloc_security; - struct list_head bpf_map_free_security; - struct list_head bpf_prog_alloc_security; - struct list_head bpf_prog_free_security; + struct hlist_head bpf; + struct hlist_head bpf_map; + struct hlist_head bpf_prog; + struct hlist_head bpf_map_alloc_security; + struct hlist_head bpf_map_free_security; + struct hlist_head bpf_prog_alloc_security; + struct hlist_head bpf_prog_free_security; #endif /* CONFIG_BPF_SYSCALL */ } __randomize_layout; @@ -1963,8 +1963,8 @@ struct security_hook_heads { * For use with generic list macros for common operations. */ struct security_hook_list { - struct list_head list; - struct list_head *head; + struct hlist_node list; + struct hlist_head *head; union security_list_options hook; char *lsm; } __randomize_layout; @@ -2003,7 +2003,7 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, int i; for (i = 0; i < count; i++) - list_del_rcu(&hooks[i].list); + hlist_del_rcu(&hooks[i].list); } #endif /* CONFIG_SECURITY_SELINUX_DISABLE */ -- cgit v1.2.3 From a95b37e20db9a2b05354eec009b2188523a21c8e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 27 Mar 2018 21:52:50 +0900 Subject: kbuild: get out of Since commit 28128c61e08e ("kconfig.h: Include compiler types to avoid missed struct attributes"), pulls in kernel-space headers to unrelated places. Commit 0f9da844d877 ("MIPS: boot: Define __ASSEMBLY__ for its.S build") suppress the build error by defining __ASSEMBLY__, but ITS (i.e. DTS) is not assembly, and should not include in the first place. Looking at arch/s390/tools/Makefile, host programs gen_facilities and gen_opcode_table now pull in as well. The motivation for that commit was to define necessary attributes before any struct is defined. Obviously, this happens only in C. It is enough to include only when compiling C files, and only when compiling kernel space. Move the include to c_flags. Signed-off-by: Masahiro Yamada --- include/linux/kconfig.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index dcde9471897d..cc8fa109cfa3 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -70,7 +70,4 @@ */ #define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option)) -/* Make sure we always have all types and struct attributes defined. */ -#include - #endif /* __LINUX_KCONFIG_H */ -- cgit v1.2.3 From 619e6f340cec7c5d1449a2951dae5af0990bd0f5 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 30 Mar 2018 17:39:31 -0500 Subject: PCI/IOV: Add missing prototypes for powerpc pcibios interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing prototypes for: resource_size_t pcibios_default_alignment(void); int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs); int pcibios_sriov_disable(struct pci_dev *pdev); This fixes the following warnings treated as errors when using W=1: arch/powerpc/kernel/pci-common.c:236:17: error: no previous prototype for ‘pcibios_default_alignment’ [-Werror=missing-prototypes] arch/powerpc/kernel/pci-common.c:253:5: error: no previous prototype for ‘pcibios_sriov_enable’ [-Werror=missing-prototypes] arch/powerpc/kernel/pci-common.c:261:5: error: no previous prototype for ‘pcibios_sriov_disable’ [-Werror=missing-prototypes] Also, commit 978d2d683123 ("PCI: Add pcibios_iov_resource_alignment() interface") added a new function but the prototype was located in the main header instead of the CONFIG_PCI_IOV specific section. Move this function next to the newly added ones. Signed-off-by: Mathieu Malaterre Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 562875d34b98..df17288fc1f6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1295,7 +1295,6 @@ unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); resource_size_t pcibios_window_alignment(struct pci_bus *bus, unsigned long type); -resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno); #define PCI_VGA_STATE_CHANGE_BRIDGE (1 << 0) #define PCI_VGA_STATE_CHANGE_DECODES (1 << 1) @@ -1923,6 +1922,7 @@ void pcibios_release_device(struct pci_dev *dev); void pcibios_penalize_isa_irq(int irq, int active); int pcibios_alloc_irq(struct pci_dev *dev); void pcibios_free_irq(struct pci_dev *dev); +resource_size_t pcibios_default_alignment(void); #ifdef CONFIG_HIBERNATE_CALLBACKS extern struct dev_pm_ops pcibios_pm_ops; @@ -1955,6 +1955,11 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); + +/* Arch may override these (weak) */ +int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs); +int pcibios_sriov_disable(struct pci_dev *pdev); +resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno); #else static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id) { -- cgit v1.2.3 From e5d672a0780d9e7118caad4c171ec88b8299398d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:56 -0700 Subject: rhashtable: reorganize struct rhashtable layout While under frags DDOS I noticed unfortunate false sharing between @nelems and @params.automatic_shrinking Move @nelems at the end of struct rhashtable so that first cache line is shared between all cpus, because almost never dirtied. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 668a21f04b09..1f8ad121eb43 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -152,25 +152,25 @@ struct rhashtable_params { /** * struct rhashtable - Hash table handle * @tbl: Bucket table - * @nelems: Number of elements in table * @key_len: Key length for hashfn - * @p: Configuration parameters * @max_elems: Maximum number of elements in table + * @p: Configuration parameters * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list + * @nelems: Number of elements in table */ struct rhashtable { struct bucket_table __rcu *tbl; - atomic_t nelems; unsigned int key_len; - struct rhashtable_params p; unsigned int max_elems; + struct rhashtable_params p; bool rhlist; struct work_struct run_work; struct mutex mutex; spinlock_t lock; + atomic_t nelems; }; /** -- cgit v1.2.3 From bf66337140c64c27fa37222b7abca7e49d63fb57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 31 Mar 2018 12:58:58 -0700 Subject: inet: frags: get rid of ipfrag_skb_cb/FRAG_CB ip_defrag uses skb->cb[] to store the fragment offset, and unfortunately this integer is currently in a different cache line than skb->next, meaning that we use two cache lines per skb when finding the insertion point. By aliasing skb->ip_defrag_offset and skb->dev, we pack all the fields in a single cache line and save precious memory bandwidth. Note that after the fast path added by Changli Gao in commit d6bebca92c66 ("fragment: add fast path for in-order fragments") this change wont help the fast path, since we still need to access prev->len (2nd cache line), but will show great benefits when slow path is entered, since we perform a linear scan of a potentially long list. Also, note that this potential long list is an attack vector, we might consider also using an rb-tree there eventually. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 47082f54ec1f..9065477ed255 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -672,6 +672,7 @@ struct sk_buff { * UDP receive path is one user. */ unsigned long dev_scratch; + int ip_defrag_offset; }; }; struct rb_node rbnode; /* used in netem & tcp stack */ -- cgit v1.2.3 From e0be6bea2583486ec4ed98e36437d82ea8190811 Mon Sep 17 00:00:00 2001 From: Atul Gupta Date: Sat, 31 Mar 2018 21:41:53 +0530 Subject: ethtool: enable Inline TLS in HW Ethtool option enables TLS record offload on HW, user configures the feature for netdev capable of Inline TLS. This allows user to define custom sk_prot for Inline TLS sock Signed-off-by: Atul Gupta Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index db84c516bcfb..35b79f47a13d 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -79,6 +79,7 @@ enum { NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */ NETIF_F_GRO_HW_BIT, /* Hardware Generic receive offload */ + NETIF_F_HW_TLS_RECORD_BIT, /* Offload TLS record */ /* * Add your fresh new feature above and remember to update @@ -145,6 +146,7 @@ enum { #define NETIF_F_HW_ESP __NETIF_F(HW_ESP) #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM) #define NETIF_F_RX_UDP_TUNNEL_PORT __NETIF_F(RX_UDP_TUNNEL_PORT) +#define NETIF_F_HW_TLS_RECORD __NETIF_F(HW_TLS_RECORD) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) -- cgit v1.2.3 From dccbf08005df800f5c8e948ab6132ed5536134bc Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 17 Feb 2018 09:29:58 +0100 Subject: libceph, ceph: change ceph_calc_file_object_mapping() signature - make it void - xlen (object extent length) out parameter should be u32 because only a single stripe unit is mapped at a time Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index d41fad99c0fa..92314035dac1 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -280,10 +280,9 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting, const struct ceph_osds *new_acting, bool any_change); -/* calculate mapping of a file extent to an object */ -extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 len, - u64 *bno, u64 *oxoff, u64 *oxlen); +void ceph_calc_file_object_mapping(struct ceph_file_layout *l, + u64 off, u64 len, + u64 *objno, u64 *objoff, u32 *xlen); int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi, const struct ceph_object_id *oid, -- cgit v1.2.3 From 5359a17d2706b86da2af83027343d5eb256f7670 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 20 Jan 2018 10:30:10 +0100 Subject: libceph, rbd: new bio handling code (aka don't clone bios) The reason we clone bios is to be able to give each object request (and consequently each ceph_osd_data/ceph_msg_data item) its own pointer to a (list of) bio(s). The messenger then initializes its cursor with cloned bio's ->bi_iter, so it knows where to start reading from/writing to. That's all the cloned bios are used for: to determine each object request's starting position in the provided data buffer. Introduce ceph_bio_iter to do exactly that -- store position within bio list (i.e. pointer to bio) + position within that bio (i.e. bvec_iter). Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 59 +++++++++++++++++++++++++++++++++++------ include/linux/ceph/osd_client.h | 11 ++++---- 2 files changed, 57 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index ead9d85f1c11..d7b9605fd51d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -93,14 +93,60 @@ static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type) } } +#ifdef CONFIG_BLOCK + +struct ceph_bio_iter { + struct bio *bio; + struct bvec_iter iter; +}; + +#define __ceph_bio_iter_advance_step(it, n, STEP) do { \ + unsigned int __n = (n), __cur_n; \ + \ + while (__n) { \ + BUG_ON(!(it)->iter.bi_size); \ + __cur_n = min((it)->iter.bi_size, __n); \ + (void)(STEP); \ + bio_advance_iter((it)->bio, &(it)->iter, __cur_n); \ + if (!(it)->iter.bi_size && (it)->bio->bi_next) { \ + dout("__ceph_bio_iter_advance_step next bio\n"); \ + (it)->bio = (it)->bio->bi_next; \ + (it)->iter = (it)->bio->bi_iter; \ + } \ + __n -= __cur_n; \ + } \ +} while (0) + +/* + * Advance @it by @n bytes. + */ +#define ceph_bio_iter_advance(it, n) \ + __ceph_bio_iter_advance_step(it, n, 0) + +/* + * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec. + */ +#define ceph_bio_iter_advance_step(it, n, BVEC_STEP) \ + __ceph_bio_iter_advance_step(it, n, ({ \ + struct bio_vec bv; \ + struct bvec_iter __cur_iter; \ + \ + __cur_iter = (it)->iter; \ + __cur_iter.bi_size = __cur_n; \ + __bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \ + (void)(BVEC_STEP); \ + })) + +#endif /* CONFIG_BLOCK */ + struct ceph_msg_data { struct list_head links; /* ceph_msg->data */ enum ceph_msg_data_type type; union { #ifdef CONFIG_BLOCK struct { - struct bio *bio; - size_t bio_length; + struct ceph_bio_iter bio_pos; + u32 bio_length; }; #endif /* CONFIG_BLOCK */ struct { @@ -122,10 +168,7 @@ struct ceph_msg_data_cursor { bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK - struct { /* bio */ - struct bio *bio; /* bio from list */ - struct bvec_iter bvec_iter; - }; + struct ceph_bio_iter bio_iter; #endif /* CONFIG_BLOCK */ struct { /* pages */ unsigned int page_offset; /* offset in page */ @@ -290,8 +333,8 @@ extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg, struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK -extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, - size_t length); +void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, + u32 length); #endif /* CONFIG_BLOCK */ extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 52fb37d1c2a5..315691490cb0 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -72,8 +72,8 @@ struct ceph_osd_data { struct ceph_pagelist *pagelist; #ifdef CONFIG_BLOCK struct { - struct bio *bio; /* list of bios */ - size_t bio_length; /* total in list */ + struct ceph_bio_iter bio_pos; + u32 bio_length; }; #endif /* CONFIG_BLOCK */ }; @@ -405,9 +405,10 @@ extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, unsigned int which, struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK -extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *, - unsigned int which, - struct bio *bio, size_t bio_length); +void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, + unsigned int which, + struct ceph_bio_iter *bio_pos, + u32 bio_length); #endif /* CONFIG_BLOCK */ extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, -- cgit v1.2.3 From b9e281c2b38804984d619e1d9efc4b9020bcb291 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 20 Jan 2018 10:30:11 +0100 Subject: libceph: introduce BVECS data type In preparation for rbd "fancy" striping, introduce ceph_bvec_iter for working with bio_vec array data buffers. The wrappers are trivial, but make it look similar to ceph_bio_iter. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 42 +++++++++++++++++++++++++++++++++++++++++ include/linux/ceph/osd_client.h | 8 ++++++++ 2 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index d7b9605fd51d..c7dfcb8a1fb2 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -76,6 +76,7 @@ enum ceph_msg_data_type { #ifdef CONFIG_BLOCK CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ #endif /* CONFIG_BLOCK */ + CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ }; static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type) @@ -87,6 +88,7 @@ static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type) #ifdef CONFIG_BLOCK case CEPH_MSG_DATA_BIO: #endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_BVECS: return true; default: return false; @@ -139,6 +141,42 @@ struct ceph_bio_iter { #endif /* CONFIG_BLOCK */ +struct ceph_bvec_iter { + struct bio_vec *bvecs; + struct bvec_iter iter; +}; + +#define __ceph_bvec_iter_advance_step(it, n, STEP) do { \ + BUG_ON((n) > (it)->iter.bi_size); \ + (void)(STEP); \ + bvec_iter_advance((it)->bvecs, &(it)->iter, (n)); \ +} while (0) + +/* + * Advance @it by @n bytes. + */ +#define ceph_bvec_iter_advance(it, n) \ + __ceph_bvec_iter_advance_step(it, n, 0) + +/* + * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec. + */ +#define ceph_bvec_iter_advance_step(it, n, BVEC_STEP) \ + __ceph_bvec_iter_advance_step(it, n, ({ \ + struct bio_vec bv; \ + struct bvec_iter __cur_iter; \ + \ + __cur_iter = (it)->iter; \ + __cur_iter.bi_size = (n); \ + for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter) \ + (void)(BVEC_STEP); \ + })) + +#define ceph_bvec_iter_shorten(it, n) do { \ + BUG_ON((n) > (it)->iter.bi_size); \ + (it)->iter.bi_size = (n); \ +} while (0) + struct ceph_msg_data { struct list_head links; /* ceph_msg->data */ enum ceph_msg_data_type type; @@ -149,6 +187,7 @@ struct ceph_msg_data { u32 bio_length; }; #endif /* CONFIG_BLOCK */ + struct ceph_bvec_iter bvec_pos; struct { struct page **pages; /* NOT OWNER. */ size_t length; /* total # bytes */ @@ -170,6 +209,7 @@ struct ceph_msg_data_cursor { #ifdef CONFIG_BLOCK struct ceph_bio_iter bio_iter; #endif /* CONFIG_BLOCK */ + struct bvec_iter bvec_iter; struct { /* pages */ unsigned int page_offset; /* offset in page */ unsigned short page_index; /* index in array */ @@ -336,6 +376,8 @@ extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg, void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, u32 length); #endif /* CONFIG_BLOCK */ +void ceph_msg_data_add_bvecs(struct ceph_msg *msg, + struct ceph_bvec_iter *bvec_pos); extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, bool can_fail); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 315691490cb0..528ccc943cee 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -57,6 +57,7 @@ enum ceph_osd_data_type { #ifdef CONFIG_BLOCK CEPH_OSD_DATA_TYPE_BIO, #endif /* CONFIG_BLOCK */ + CEPH_OSD_DATA_TYPE_BVECS, }; struct ceph_osd_data { @@ -76,6 +77,7 @@ struct ceph_osd_data { u32 bio_length; }; #endif /* CONFIG_BLOCK */ + struct ceph_bvec_iter bvec_pos; }; }; @@ -410,6 +412,9 @@ void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, struct ceph_bio_iter *bio_pos, u32 bio_length); #endif /* CONFIG_BLOCK */ +void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, + unsigned int which, + struct ceph_bvec_iter *bvec_pos); extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, unsigned int which, @@ -419,6 +424,9 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); +void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, + unsigned int which, + struct bio_vec *bvecs, u32 bytes); extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, -- cgit v1.2.3 From ed0811d2d243c4195580a9671266031907c02ca7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Feb 2018 15:23:22 +0100 Subject: libceph: striping framework implementation Signed-off-by: Ilya Dryomov --- include/linux/ceph/striper.h | 65 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 include/linux/ceph/striper.h (limited to 'include/linux') diff --git a/include/linux/ceph/striper.h b/include/linux/ceph/striper.h new file mode 100644 index 000000000000..74134ee5fdc8 --- /dev/null +++ b/include/linux/ceph/striper.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CEPH_STRIPER_H +#define _LINUX_CEPH_STRIPER_H + +#include +#include + +struct ceph_file_layout; + +struct ceph_object_extent { + struct list_head oe_item; + u64 oe_objno; + u64 oe_off; + u64 oe_len; +}; + +static inline void ceph_object_extent_init(struct ceph_object_extent *ex) +{ + INIT_LIST_HEAD(&ex->oe_item); +} + +/* + * Called for each mapped stripe unit. + * + * @bytes: number of bytes mapped, i.e. the minimum of the full length + * requested (file extent length) or the remainder of the stripe + * unit within an object + */ +typedef void (*ceph_object_extent_fn_t)(struct ceph_object_extent *ex, + u32 bytes, void *arg); + +int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len, + struct list_head *object_extents, + struct ceph_object_extent *alloc_fn(void *arg), + void *alloc_arg, + ceph_object_extent_fn_t action_fn, + void *action_arg); +int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len, + struct list_head *object_extents, + ceph_object_extent_fn_t action_fn, + void *action_arg); + +struct ceph_file_extent { + u64 fe_off; + u64 fe_len; +}; + +static inline u64 ceph_file_extents_bytes(struct ceph_file_extent *file_extents, + u32 num_file_extents) +{ + u64 bytes = 0; + u32 i; + + for (i = 0; i < num_file_extents; i++) + bytes += file_extents[i].fe_len; + + return bytes; +} + +int ceph_extent_to_file(struct ceph_file_layout *l, + u64 objno, u64 objoff, u64 objlen, + struct ceph_file_extent **file_extents, + u32 *num_file_extents); + +#endif -- cgit v1.2.3 From 08c1ac508b6dc20ac866e7cdb7279245437c7d26 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 17 Feb 2018 10:41:20 +0100 Subject: libceph, ceph: move ceph_calc_file_object_mapping() to striper.c ceph_calc_file_object_mapping() has nothing to do with osdmaps. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 5 ----- include/linux/ceph/striper.h | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 92314035dac1..e71fb222c7c3 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -5,7 +5,6 @@ #include #include #include -#include #include /* @@ -280,10 +279,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting, const struct ceph_osds *new_acting, bool any_change); -void ceph_calc_file_object_mapping(struct ceph_file_layout *l, - u64 off, u64 len, - u64 *objno, u64 *objoff, u32 *xlen); - int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi, const struct ceph_object_id *oid, const struct ceph_object_locator *oloc, diff --git a/include/linux/ceph/striper.h b/include/linux/ceph/striper.h index 74134ee5fdc8..cbd0d24b7148 100644 --- a/include/linux/ceph/striper.h +++ b/include/linux/ceph/striper.h @@ -7,6 +7,10 @@ struct ceph_file_layout; +void ceph_calc_file_object_mapping(struct ceph_file_layout *l, + u64 off, u64 len, + u64 *objno, u64 *objoff, u32 *xlen); + struct ceph_object_extent { struct list_head oe_item; u64 oe_objno; -- cgit v1.2.3 From bb48bd4dc45f9ee1e44d8e9fcb01023e0d0ba80d Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 13 Mar 2018 10:42:44 +0800 Subject: ceph: optimize memory usage In current code, regular file and directory use same struct ceph_file_info to store fs specific data so the struct has to include some fields which are only used for directory (e.g., readdir related info), when having plenty of regular files, it will lead to memory waste. This patch introduces dedicated ceph_dir_file_info cache for readdir related thins. So that regular file does not include those unused fields anymore. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c2ec44cf5098..49c93b9308d7 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -262,6 +262,7 @@ extern struct kmem_cache *ceph_cap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; +extern struct kmem_cache *ceph_dir_file_cachep; /* ceph_common.c */ extern bool libceph_compatible(void *data); -- cgit v1.2.3 From fb18a57568c2b84cd611e242c0f6fa97b45e4907 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 5 Jan 2018 10:47:18 +0000 Subject: ceph: quota: add initial infrastructure to support cephfs quotas This patch adds the infrastructure required to support cephfs quotas as it is currently implemented in the ceph fuse client. Cephfs quotas can be set on any directory, and can restrict the number of bytes or the number of files stored beneath that point in the directory hierarchy. Quotas are set using the extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', and can be removed by setting these attributes to '0'. Link: http://tracker.ceph.com/issues/22372 Signed-off-by: Luis Henriques Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 1 + include/linux/ceph/ceph_fs.h | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 59042d5ac520..3901927cf6a0 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -204,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_OSD_POOLRESEND | \ + CEPH_FEATURE_MDS_QUOTA | \ CEPH_FEATURE_CRUSH_V4 | \ CEPH_FEATURE_NEW_OSDOP_ENCODING | \ CEPH_FEATURE_SERVER_JEWEL | \ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 88dd51381aaf..7ecfc88314d8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -134,6 +134,7 @@ struct ceph_dir_layout { #define CEPH_MSG_CLIENT_LEASE 0x311 #define CEPH_MSG_CLIENT_SNAP 0x312 #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 +#define CEPH_MSG_CLIENT_QUOTA 0x314 /* pool ops */ #define CEPH_MSG_POOLOP_REPLY 48 @@ -807,4 +808,20 @@ struct ceph_mds_snap_realm { } __attribute__ ((packed)); /* followed by my snap list, then prior parent snap list */ +/* + * quotas + */ +struct ceph_mds_quota { + __le64 ino; /* ino */ + struct ceph_timespec rctime; + __le64 rbytes; /* dir stats */ + __le64 rfiles; + __le64 rsubdirs; + __u8 struct_v; /* compat */ + __u8 struct_compat; + __le32 struct_len; + __le64 max_bytes; /* quota max. bytes */ + __le64 max_files; /* quota max. files */ +} __attribute__ ((packed)); + #endif -- cgit v1.2.3 From 8ea229511e06f9635ecc338dcbe0db41a73623f0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 2 Apr 2018 16:26:25 +0530 Subject: thermal: Add cooling device's statistics in sysfs This extends the sysfs interface for thermal cooling devices and exposes some pretty useful statistics. These statistics have proven to be quite useful specially while doing benchmarks related to the task scheduler, where we want to make sure that nothing has disrupted the test, specially the cooling device which may have put constraints on the CPUs. The information exposed here tells us to what extent the CPUs were constrained by the thermal framework. The write-only "reset" file is used to reset the statistics. The read-only "time_in_state_ms" file shows the time (in msec) spent by the device in the respective cooling states, and it prints one line per cooling state. The read-only "total_trans" file shows single positive integer value showing the total number of cooling state transitions the device has gone through since the time the cooling device is registered or the time when statistics were reset last. The read-only "trans_table" file shows a two dimensional matrix, where an entry (row i, column j) represents the number of transitions from State_i to State_j. This is how the directory structure looks like for a single cooling device: $ ls -R /sys/class/thermal/cooling_device0/ /sys/class/thermal/cooling_device0/: cur_state max_state power stats subsystem type uevent /sys/class/thermal/cooling_device0/power: autosuspend_delay_ms runtime_active_time runtime_suspended_time control runtime_status /sys/class/thermal/cooling_device0/stats: reset time_in_state_ms total_trans trans_table This is tested on ARM 64-bit Hisilicon hikey620 board running Ubuntu and ARM 64-bit Hisilicon hikey960 board running Android. Signed-off-by: Viresh Kumar Signed-off-by: Zhang Rui --- include/linux/thermal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 8c5302374eaa..7834be668d80 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -148,6 +148,7 @@ struct thermal_cooling_device { struct device device; struct device_node *np; void *devdata; + void *stats; const struct thermal_cooling_device_ops *ops; bool updated; /* true if the cooling device does not need update */ struct mutex lock; /* protect thermal_instances list */ -- cgit v1.2.3 From d53238cd51a80f6f2e5b9d64830c62e2086787bd Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:37 +0100 Subject: kernel: open-code sys_rt_sigpending() in sys_sigpending() A similar but not fully equivalent code path is already open-coded three times (in sys_rt_sigpending and in the two compat stubs), so do it a fourth time here. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0526286a0314..a63e21e7a3af 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -288,7 +288,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data); asmlinkage long sys_personality(unsigned int personality); -asmlinkage long sys_sigpending(old_sigset_t __user *set); +asmlinkage long sys_sigpending(old_sigset_t __user *uset); asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset); asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, -- cgit v1.2.3 From 2de0db992de189fccc83fed57c30875144821491 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:26 +0100 Subject: mm: use do_futex() instead of sys_futex() in mm_release() sys_futex() is a wrapper to do_futex() which does not modify any values here: - uaddr, val and val3 are kept the same - op is masked with FUTEX_CMD_MASK, but is always set to FUTEX_WAKE. Therefore, val2 is always 0. - as utime is set to NULL, *timeout is NULL This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Darren Hart Cc: Andrew Morton Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- include/linux/futex.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index c0fb9a24bbd2..821ae502d3d8 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -9,9 +9,6 @@ struct inode; struct mm_struct; struct task_struct; -long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - u32 __user *uaddr2, u32 val2, u32 val3); - extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); @@ -55,6 +52,9 @@ union futex_key { #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); + +long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); #ifdef CONFIG_HAVE_FUTEX_CMPXCHG #define futex_cmpxchg_enabled 1 #else @@ -64,6 +64,13 @@ extern int futex_cmpxchg_enabled; static inline void exit_robust_list(struct task_struct *curr) { } + +static inline long do_futex(u32 __user *uaddr, int op, u32 val, + ktime_t *timeout, u32 __user *uaddr2, + u32 val2, u32 val3) +{ + return -EINVAL; +} #endif #ifdef CONFIG_FUTEX_PI -- cgit v1.2.3 From 7a09e1eb9c1e5179797e0f3341ba7315c7626a0c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:10:06 +0100 Subject: net: socket: add __sys_recvfrom() helper; remove in-kernel call to syscall Using the net-internal helper __sys_recvfrom() allows us to avoid the internal calls to the sys_recvfrom() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 9286a5a8c60c..40cc93b91628 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -353,4 +353,10 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen unsigned int flags, struct timespec *timeout); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags); + +/* helpers which do the actual work for syscalls */ +extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, + unsigned int flags, struct sockaddr __user *addr, + int __user *addr_len); + #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 211b634b7f1ebc3436d5e8a34810a8eaa1f269d9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:18:52 +0100 Subject: net: socket: add __sys_sendto() helper; remove in-kernel call to syscall Using the net-internal helper __sys_sendto() allows us to avoid the internal calls to the sys_sendto() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 40cc93b91628..54b85abc7265 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -358,5 +358,8 @@ extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, struct sockaddr __user *addr, int __user *addr_len); +extern int __sys_sendto(int fd, void __user *buff, size_t len, + unsigned int flags, struct sockaddr __user *addr, + int addr_len); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 4541e80560ca56d63348f40f2e34b044a5eb3dd7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:24:23 +0100 Subject: net: socket: add __sys_accept4() helper; remove in-kernel call to syscall Using the net-internal helper __sys_accept4() allows us to avoid the internal calls to the sys_accept4() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 54b85abc7265..6a9840271676 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -361,5 +361,7 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, extern int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len); +extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 9d6a15c3f23bcf0b0e6c6efd7d19f52d960a8697 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:29:43 +0100 Subject: net: socket: add __sys_socket() helper; remove in-kernel call to syscall Using the net-internal helper __sys_socket() allows us to avoid the internal calls to the sys_socket() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 6a9840271676..f8d040434a13 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -363,5 +363,6 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len, int addr_len); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); +extern int __sys_socket(int family, int type, int protocol); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From a87d35d87a3e4f2a0b0968d1f06703c909138b62 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:33:09 +0100 Subject: net: socket: add __sys_bind() helper; remove in-kernel call to syscall Using the net-internal helper __sys_bind() allows us to avoid the internal calls to the sys_bind() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index f8d040434a13..e9cee272da13 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -364,5 +364,6 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len, extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); +extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 1387c2c2f988f8180c6189d5083eaeeb8f120d44 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:35:09 +0100 Subject: net: socket: add __sys_connect() helper; remove in-kernel call to syscall Using the net-internal helper __sys_connect() allows us to avoid the internal calls to the sys_connect() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index e9cee272da13..7daa344d7320 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -365,5 +365,7 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, + int addrlen); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 25e290eed9c653cc90ac675d64b30b66cffce82f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:36:54 +0100 Subject: net: socket: add __sys_listen() helper; remove in-kernel call to syscall Using the net-internal helper __sys_listen() allows us to avoid the internal calls to the sys_listen() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 7daa344d7320..7e37af25509d 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -367,5 +367,6 @@ extern int __sys_socket(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); +extern int __sys_listen(int fd, int backlog); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 8882a107b3062c6222cdbeadb284ea054ae50a3f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:43:14 +0100 Subject: net: socket: add __sys_getsockname() helper; remove in-kernel call to syscall Using the net-internal helper __sys_getsockname() allows us to avoid the internal calls to the sys_getsockname() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 7e37af25509d..ef0226a61b03 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -368,5 +368,7 @@ extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From b21c8f838a0e5b84c0d78fd9ed6b9f2cf97afbe9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:47:00 +0100 Subject: net: socket: add __sys_getpeername() helper; remove in-kernel call to syscall Using the net-internal helper __sys_getpeername() allows us to avoid the internal calls to the sys_getpeername() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index ef0226a61b03..9ba003e92fea 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -370,5 +370,7 @@ extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, extern int __sys_listen(int fd, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); +extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 6debc8d834ebc73566e5255c565c8fb307be22c5 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 19:49:23 +0100 Subject: net: socket: add __sys_socketpair() helper; remove in-kernel call to syscall Using the net-internal helper __sys_socketpair() allows us to avoid the internal calls to the sys_socketpair() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 9ba003e92fea..dbdddf0d079e 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -372,5 +372,7 @@ extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); +extern int __sys_socketpair(int family, int type, int protocol, + int __user *usockvec); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From 005a1aeac46666e2805f316596893ebd2a3c12e7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 20:07:05 +0100 Subject: net: socket: add __sys_shutdown() helper; remove in-kernel call to syscall Using the net-internal helper __sys_shutdown() allows us to avoid the internal calls to the sys_shutdown() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index dbdddf0d079e..b205138b69f1 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -374,5 +374,6 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); +extern int __sys_shutdown(int fd, int how); #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From cc36dca0dffad991135d0e28938ba2b4c0b786f7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 20:10:59 +0100 Subject: net: socket: add __sys_setsockopt() helper; remove in-kernel call to syscall Using the net-internal helper __sys_setsockopt() allows us to avoid the internal calls to the sys_setsockopt() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index b205138b69f1..cad120e4ed4b 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -376,4 +376,5 @@ extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); extern int __sys_shutdown(int fd, int how); + #endif /* _LINUX_SOCKET_H */ -- cgit v1.2.3 From e1834a329d6bb5659c14e9e537bd1f750fe3b85e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 20:35:57 +0100 Subject: net: socket: move check for forbid_cmsg_compat to __sys_...msg() The non-compat codepaths for sys_...msg() verify that MSG_CMSG_COMPAT is not set. By moving this check to the __sys_...msg() functions (and making it dependent on a static flag passed to this function), we can call the __sys...msg() functions instead of the syscall functions in all cases. __sys_recvmmsg() does not need this trickery, as the check is handled within the do_sys_recvmmsg() function internal to net/socket.c. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/socket.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index cad120e4ed4b..e2b6bd4fe977 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -346,13 +346,18 @@ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); struct timespec; -/* The __sys_...msg variants allow MSG_CMSG_COMPAT */ -extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags); -extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); +/* The __sys_...msg variants allow MSG_CMSG_COMPAT iff + * forbid_cmsg_compat==false + */ +extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, + unsigned int flags, bool forbid_cmsg_compat); +extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, + unsigned int flags, bool forbid_cmsg_compat); extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, struct timespec *timeout); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, - unsigned int vlen, unsigned int flags); + unsigned int vlen, unsigned int flags, + bool forbid_cmsg_compat); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, -- cgit v1.2.3 From cb0b476ab12ca3bd9dd9122047660f3a73e8d647 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 17 Mar 2018 16:26:56 +0100 Subject: fs/quota: add kernel_quotactl() helper; remove in-kernel call to syscall Using the fs-internal kernel_quotactl() helper allows us to get rid of the fs-internal call to the sys_quotactl() syscall. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Acked-by: Jan Kara Signed-off-by: Dominik Brodowski --- include/linux/quotaops.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 2fb6fb11132e..dc905a4ff8d7 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -27,6 +27,9 @@ static inline bool is_quota_modification(struct inode *inode, struct iattr *ia) (ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid)); } +int kernel_quotactl(unsigned int cmd, const char __user *special, + qid_t id, void __user *addr); + #if defined(CONFIG_QUOTA) #define quota_error(sb, fmt, args...) \ -- cgit v1.2.3 From ab0d1e85bfd0c25260f02cd3708d5abdfb5b5a9c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 4 Mar 2018 21:54:05 +0100 Subject: fs/quota: use COMPAT_SYSCALL_DEFINE for sys32_quotactl() While sys32_quotactl() is only needed on x86, it can use the recommended COMPAT_SYSCALL_DEFINEx() machinery for its setup. Acked-by: Jan Kara Cc: Christoph Hellwig Signed-off-by: Dominik Brodowski --- include/linux/compat.h | 3 +++ include/linux/syscalls.h | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 16c3027074a2..f1649a5e6716 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -461,6 +461,9 @@ asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, const struct compat_iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); +asmlinkage long compat_sys_quotactl32(unsigned int cmd, + const char __user *special, qid_t id, void __user *addr); + #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 asmlinkage long compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a63e21e7a3af..6ab7ed71a8b6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -241,8 +241,6 @@ static inline void addr_limit_user_check(void) #endif } -asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, - qid_t id, void __user *addr); asmlinkage long sys_time(time_t __user *tloc); asmlinkage long sys_stime(time_t __user *tptr); asmlinkage long sys_gettimeofday(struct timeval __user *tv, -- cgit v1.2.3 From 312db1aa1dc7bff133d95c92efcc5e42b57cefa6 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:39 +0100 Subject: fs: add ksys_mount() helper; remove in-kernel calls to sys_mount() Using this helper allows us to avoid the in-kernel calls to the sys_mount() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_mount(). In the near future, all callers of ksys_mount() should be converted to call do_mount() directly. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6ab7ed71a8b6..3a9f9c534624 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -946,4 +946,7 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, * the ksys_xyzyyz() functions prototyped below. */ +int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, + unsigned long flags, void __user *data); + #endif -- cgit v1.2.3 From 3a18ef5c1b3935cb05888fc37964321f7bd6231d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:40 +0100 Subject: fs: add ksys_umount() helper; remove in-kernel call to sys_umount() Using this helper allows us to avoid the in-kernel call to the sys_umount() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as ksys_umount(). In the near future, the only fs-external caller of ksys_umount() should be converted to call do_umount() directly. Then, ksys_umount() can be moved within sys_umount() again. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3a9f9c534624..48964c408c7b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -948,5 +948,6 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, unsigned long flags, void __user *data); +int ksys_umount(char __user *name, int flags); #endif -- cgit v1.2.3 From c7248321a3d42ffba78db0dde88d1c49ca1c045f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:40 +0100 Subject: fs: add ksys_dup{,3}() helper; remove in-kernel calls to sys_dup{,3}() Using ksys_dup() and ksys_dup3() as helper functions allows us to avoid the in-kernel calls to the sys_dup() and sys_dup3() syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_dup{,3}(). In the near future, the fs-external callers of ksys_dup{,3}() should be converted to call do_dup2() directly. Then, ksys_dup{,3}() can be moved within sys_dup{,3}() again. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 48964c408c7b..50876ae1d17b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -949,5 +949,6 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, unsigned long flags, void __user *data); int ksys_umount(char __user *name, int flags); +int ksys_dup(unsigned int fildes); #endif -- cgit v1.2.3 From a16fe33ab5572e52ef4cb9719d6eb49623b2528a Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:41 +0100 Subject: fs: add ksys_chroot() helper; remove-in kernel calls to sys_chroot() Using this helper allows us to avoid the in-kernel calls to the sys_chroot() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_chroot(). In the near future, the fs-external callers of ksys_chroot() should be converted to use kern_path()/set_fs_root() directly. Then ksys_chroot() can be moved within sys_chroot() again. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 50876ae1d17b..920a0db1871d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -950,5 +950,6 @@ int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, unsigned long flags, void __user *data); int ksys_umount(char __user *name, int flags); int ksys_dup(unsigned int fildes); +int ksys_chroot(const char __user *filename); #endif -- cgit v1.2.3 From e7a3e8b2edf544ec28f689385c3adc2903f46ec0 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:41 +0100 Subject: fs: add ksys_write() helper; remove in-kernel calls to sys_write() Using this helper allows us to avoid the in-kernel calls to the sys_write() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_write(). In the near future, the do_mounts / initramfs callers of ksys_write() should be converted to use filp_open() and vfs_write() instead. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Cc: linux-s390@vger.kernel.org Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 920a0db1871d..80524faa9664 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -951,5 +951,6 @@ int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type, int ksys_umount(char __user *name, int flags); int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); +ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); #endif -- cgit v1.2.3 From 447016e9681965fda8dcd9e4fd3c55308a6fd166 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:46 +0100 Subject: fs: add ksys_chdir() helper; remove in-kernel calls to sys_chdir() Using this helper allows us to avoid the in-kernel calls to the sys_chdir() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_chdir(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 80524faa9664..090645b48447 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -952,5 +952,6 @@ int ksys_umount(char __user *name, int flags); int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); +int ksys_chdir(const char __user *filename); #endif -- cgit v1.2.3 From 0f32ab8cfac478be053cb526ced8918ef6f4df47 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:47 +0100 Subject: fs: add ksys_unlink() wrapper; remove in-kernel calls to sys_unlink() Using this wrapper allows us to avoid the in-kernel calls to the sys_unlink() syscall. The ksys_ prefix denotes that this function is meant s a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_unlink(). In the near future, all callers of ksys_unlink() should be converted to call do_unlinkat() directly or, at least, to operate on regular kernel pointers. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 090645b48447..7cbfb41e666b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -954,4 +954,15 @@ int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); +/* + * The following kernel syscall equivalents are just wrappers to fs-internal + * functions. Therefore, provide stubs to be inlined at the callsites. + */ +extern long do_unlinkat(int dfd, struct filename *name); + +static inline long ksys_unlink(const char __user *pathname) +{ + return do_unlinkat(AT_FDCWD, getname(pathname)); +} + #endif -- cgit v1.2.3 From f459dffae1c6026928bbe8e972daecb635b7b5e9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:48 +0100 Subject: fs: add ksys_rmdir() wrapper; remove in-kernel calls to sys_rmdir() Using this wrapper allows us to avoid the in-kernel calls to the sys_rmdir() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_rmdir(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7cbfb41e666b..746043a05884 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -965,4 +965,11 @@ static inline long ksys_unlink(const char __user *pathname) return do_unlinkat(AT_FDCWD, getname(pathname)); } +extern long do_rmdir(int dfd, const char __user *pathname); + +static inline long ksys_rmdir(const char __user *pathname) +{ + return do_rmdir(AT_FDCWD, pathname); +} + #endif -- cgit v1.2.3 From 0101db7a301981a008296d522d8c1f456b0fe837 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:49 +0100 Subject: fs: add do_mkdirat() helper and ksys_mkdir() wrapper; remove in-kernel calls to syscall Using the fs-internal do_mkdirat() helper allows us to get rid of fs-internal calls to the sys_mkdirat() syscall. Introducing the ksys_mkdir() wrapper allows us to avoid the in-kernel calls to the sys_mkdir() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_mkdir(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 746043a05884..c982cb5f4e50 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -972,4 +972,11 @@ static inline long ksys_rmdir(const char __user *pathname) return do_rmdir(AT_FDCWD, pathname); } +extern long do_mkdirat(int dfd, const char __user *pathname, umode_t mode); + +static inline long ksys_mkdir(const char __user *pathname, umode_t mode) +{ + return do_mkdirat(AT_FDCWD, pathname, mode); +} + #endif -- cgit v1.2.3 From b724e846b491ef8db943be8086226c9d8da31877 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:49 +0100 Subject: fs: add do_symlinkat() helper and ksys_symlink() wrapper; remove in-kernel calls to syscall Using the fs-internal do_symlinkat() helper allows us to get rid of fs-internal calls to the sys_symlinkat() syscall. Introducing the ksys_symlink() wrapper allows us to avoid the in-kernel calls to the sys_symlink() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_symlink(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c982cb5f4e50..39c5cef86a10 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -979,4 +979,13 @@ static inline long ksys_mkdir(const char __user *pathname, umode_t mode) return do_mkdirat(AT_FDCWD, pathname, mode); } +extern long do_symlinkat(const char __user *oldname, int newdfd, + const char __user *newname); + +static inline long ksys_symlink(const char __user *oldname, + const char __user *newname) +{ + return do_symlinkat(oldname, AT_FDCWD, newname); +} + #endif -- cgit v1.2.3 From 87c4e19262d81862886207be3c8795f6576d5a52 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:50 +0100 Subject: fs: add do_mknodat() helper and ksys_mknod() wrapper; remove in-kernel calls to syscall Using the fs-internal do_mknodat() helper allows us to get rid of fs-internal calls to the sys_mknodat() syscall. Introducing the ksys_mknod() wrapper allows us to avoid the in-kernel calls to sys_mknod() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_mknod(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 39c5cef86a10..0b4fd684f0f1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -988,4 +988,13 @@ static inline long ksys_symlink(const char __user *oldname, return do_symlinkat(oldname, AT_FDCWD, newname); } +extern long do_mknodat(int dfd, const char __user *filename, umode_t mode, + unsigned int dev); + +static inline long ksys_mknod(const char __user *filename, umode_t mode, + unsigned int dev) +{ + return do_mknodat(AT_FDCWD, filename, mode, dev); +} + #endif -- cgit v1.2.3 From 46ea89eb652a365e10257016d09dcf1aaf23cf63 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:53 +0100 Subject: fs: add do_linkat() helper and ksys_link() wrapper; remove in-kernel calls to syscall Using the fs-internal do_linkat() helper allows us to get rid of fs-internal calls to the sys_linkat() syscall. Introducing the ksys_link() wrapper allows us to avoid the in-kernel calls to sys_link() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_link(). In the near future, the only fs-external user of ksys_link() should be converted to use vfs_link() instead. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0b4fd684f0f1..827ed917630c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -997,4 +997,13 @@ static inline long ksys_mknod(const char __user *filename, umode_t mode, return do_mknodat(AT_FDCWD, filename, mode, dev); } +extern int do_linkat(int olddfd, const char __user *oldname, int newdfd, + const char __user *newname, int flags); + +static inline long ksys_link(const char __user *oldname, + const char __user *newname) +{ + return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); +} + #endif -- cgit v1.2.3 From 03450e271a160bc07a2c48e5769e0ba338582d77 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:53 +0100 Subject: fs: add ksys_fchmod() and do_fchmodat() helpers and ksys_chmod() wrapper; remove in-kernel calls to syscall Using the fs-internal do_fchmodat() helper allows us to get rid of fs-internal calls to the sys_fchmodat() syscall. Introducing the ksys_fchmod() helper and the ksys_chmod() wrapper allows us to avoid the in-kernel calls to the sys_fchmod() and sys_chmod() syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_fchmod() and sys_chmod(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 827ed917630c..dd6c306f4f00 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -953,6 +953,7 @@ int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); +int ksys_fchmod(unsigned int fd, umode_t mode); /* * The following kernel syscall equivalents are just wrappers to fs-internal @@ -1006,4 +1007,11 @@ static inline long ksys_link(const char __user *oldname, return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } +extern int do_fchmodat(int dfd, const char __user *filename, umode_t mode); + +static inline int ksys_chmod(const char __user *filename, umode_t mode) +{ + return do_fchmodat(AT_FDCWD, filename, mode); +} + #endif -- cgit v1.2.3 From cbfe20f565228966f0249f016752437df95df679 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:54 +0100 Subject: fs: add do_faccessat() helper and ksys_access() wrapper; remove in-kernel calls to syscall Using the fs-internal do_faccessat() helper allows us to get rid of fs-internal calls to the sys_faccessat() syscall. Introducing the ksys_access() wrapper allows us to avoid the in-kernel calls to the sys_access() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_access(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index dd6c306f4f00..33f06de090ea 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1014,4 +1014,11 @@ static inline int ksys_chmod(const char __user *filename, umode_t mode) return do_fchmodat(AT_FDCWD, filename, mode); } +extern long do_faccessat(int dfd, const char __user *filename, int mode); + +static inline long ksys_access(const char __user *filename, int mode) +{ + return do_faccessat(AT_FDCWD, filename, mode); +} + #endif -- cgit v1.2.3 From 55731b3cda3a85ee888dac3bf1f36489f275c187 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:55 +0100 Subject: fs: add do_fchownat(), ksys_fchown() helpers and ksys_{,l}chown() wrappers Using the fs-interal do_fchownat() wrapper allows us to get rid of fs-internal calls to the sys_fchownat() syscall. Introducing the ksys_fchown() helper and the ksys_{,}chown() wrappers allows us to avoid the in-kernel calls to the sys_{,l,f}chown() syscalls. The ksys_ prefix denotes that these functions are meant as a drop-in replacement for the syscalls. In particular, they use the same calling convention as sys_{,l,f}chown(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 33f06de090ea..df0d1e818a6e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -954,6 +954,7 @@ int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); int ksys_fchmod(unsigned int fd, umode_t mode); +int ksys_fchown(unsigned int fd, uid_t user, gid_t group); /* * The following kernel syscall equivalents are just wrappers to fs-internal @@ -1021,4 +1022,20 @@ static inline long ksys_access(const char __user *filename, int mode) return do_faccessat(AT_FDCWD, filename, mode); } +extern int do_fchownat(int dfd, const char __user *filename, uid_t user, + gid_t group, int flag); + +static inline long ksys_chown(const char __user *filename, uid_t user, + gid_t group) +{ + return do_fchownat(AT_FDCWD, filename, user, group, 0); +} + +static inline long ksys_lchown(const char __user *filename, uid_t user, + gid_t group) +{ + return do_fchownat(AT_FDCWD, filename, user, group, + AT_SYMLINK_NOFOLLOW); +} + #endif -- cgit v1.2.3 From 411d9475cf901b5a6d2996b46cb5726184a4fa50 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:54 +0100 Subject: fs: add ksys_ftruncate() wrapper; remove in-kernel calls to sys_ftruncate() Using the ksys_ftruncate() wrapper allows us to get rid of in-kernel calls to the sys_ftruncate() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_ftruncate(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index df0d1e818a6e..41023177c8ec 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1038,4 +1038,11 @@ static inline long ksys_lchown(const char __user *filename, uid_t user, AT_SYMLINK_NOFOLLOW); } +extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small); + +static inline long ksys_ftruncate(unsigned int fd, unsigned long length) +{ + return do_sys_ftruncate(fd, length, 1); +} + #endif -- cgit v1.2.3 From 2ca2a09d6215fd9621aa3e2db7cc9428a61f2911 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:55 +0100 Subject: fs: add ksys_close() wrapper; remove in-kernel calls to sys_close() Using the ksys_close() wrapper allows us to get rid of in-kernel calls to the sys_close() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_close(), with one subtle difference: The few places which checked the return value did not care about the return value re-writing in sys_close(), so simply use a wrapper around __close_fd(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 41023177c8ec..38805f3447ea 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1045,4 +1045,16 @@ static inline long ksys_ftruncate(unsigned int fd, unsigned long length) return do_sys_ftruncate(fd, length, 1); } +extern int __close_fd(struct files_struct *files, unsigned int fd); + +/* + * In contrast to sys_close(), this stub does not check whether the syscall + * should or should not be restarted, but returns the raw error codes from + * __close_fd(). + */ +static inline int ksys_close(unsigned int fd) +{ + return __close_fd(current->files, fd); +} + #endif -- cgit v1.2.3 From bae217ea8c7e123ed3fb1064909a262924771bbb Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:56 +0100 Subject: fs: add ksys_open() wrapper; remove in-kernel calls to sys_open() Using this wrapper allows us to avoid the in-kernel calls to the sys_open() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_open(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 38805f3447ea..f9df17dcec1c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1057,4 +1057,15 @@ static inline int ksys_close(unsigned int fd) return __close_fd(current->files, fd); } +extern long do_sys_open(int dfd, const char __user *filename, int flags, + umode_t mode); + +static inline long ksys_open(const char __user *filename, int flags, + umode_t mode) +{ + if (force_o_largefile()) + flags |= O_LARGEFILE; + return do_sys_open(AT_FDCWD, filename, flags, mode); +} + #endif -- cgit v1.2.3 From 454dab3f965ec24fda8fbe135c8dad4c5b238a86 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 21:34:04 +0100 Subject: fs: add ksys_getdents64() helper; remove in-kernel calls to sys_getdents64() Using this helper allows us to avoid the in-kernel calls to the sys_getdents64() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_getdents64(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f9df17dcec1c..c056aff6d7ad 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -955,6 +955,8 @@ ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); int ksys_fchmod(unsigned int fd, umode_t mode); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); +int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, + unsigned int count); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From cbb60b924b9f3e4d7c67a1c9dcf981718f926e4e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 21:43:59 +0100 Subject: fs: add ksys_ioctl() helper; remove in-kernel calls to sys_ioctl() Using this helper allows us to avoid the in-kernel calls to the sys_ioctl() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_ioctl(). After careful review, at least some of these calls could be converted to do_vfs_ioctl() in future. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c056aff6d7ad..5a959efd8fb7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -957,6 +957,7 @@ int ksys_fchmod(unsigned int fd, umode_t mode); int ksys_fchown(unsigned int fd, uid_t user, gid_t group); int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); +int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From 76847e4344350970e1c2e27c28b5abb3c588c5b3 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 21:51:17 +0100 Subject: fs: add ksys_lseek() helper; remove in-kernel calls to sys_lseek() Using this helper allows us to avoid the in-kernel calls to the sys_lseek() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_lseek(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5a959efd8fb7..0f24e5334569 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -958,6 +958,7 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group); int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); +off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From 3ce4a7bf66263748194b77ccefd284be963c6304 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 13 Mar 2018 21:56:26 +0100 Subject: fs: add ksys_read() helper; remove in-kernel calls to sys_read() Using this helper allows us to avoid the in-kernel calls to the sys_read() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_read(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0f24e5334569..3a2e90842ff8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -959,6 +959,7 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); +ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From 70f68ee81e2e9ad5105b8d2bd324e890e94c6ad9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 14 Mar 2018 22:35:11 +0100 Subject: fs: add ksys_sync() helper; remove in-kernel calls to sys_sync() Using this helper allows us to avoid the in-kernel calls to the sys_sync() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_sync(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Alexander Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3a2e90842ff8..0a9942b3e718 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -960,6 +960,7 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); +void ksys_sync(void); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From 9b32105ec6b13d32d5db6a6e7992c97ce54b5ea7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:42 +0100 Subject: kernel: add ksys_unshare() helper; remove in-kernel calls to sys_unshare() Using this helper allows us to avoid the in-kernel calls to the sys_unshare() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_unshare(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Cc: Ingo Molnar Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0a9942b3e718..e724dda509e0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -961,6 +961,7 @@ int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); +int ksys_unshare(unsigned long unshare_flags); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From e2aaa9f423367ee03755d632555c242629a08d00 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 16 Mar 2018 12:36:06 +0100 Subject: kernel: add ksys_setsid() helper; remove in-kernel call to sys_setsid() Using this helper allows us to avoid the in-kernel call to the sys_setsid() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_setsid(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e724dda509e0..4dd685ee425d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -962,6 +962,7 @@ off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence); ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); int ksys_unshare(unsigned long unshare_flags); +int ksys_setsid(void); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From 806cbae1228cc1a19b978c4513f6851e9ab7f388 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:47 +0100 Subject: fs: add ksys_sync_file_range helper(); remove in-kernel calls to syscall Using this helper allows us to avoid the in-kernel calls to the sys_sync_file_range() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_sync_file_range(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4dd685ee425d..331da76f66e2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -963,6 +963,8 @@ ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count); void ksys_sync(void); int ksys_unshare(unsigned long unshare_flags); int ksys_setsid(void); +int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, + unsigned int flags); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From df260e21e6cd5d2dfc1fe9b6a3bbf747e72b3bed Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Mar 2018 17:32:11 +0100 Subject: fs: add ksys_truncate() wrapper; remove in-kernel calls to sys_truncate() Using the ksys_truncate() wrapper allows us to get rid of in-kernel calls to the sys_truncate() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_truncate(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 331da76f66e2..78b79e3a1279 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1078,4 +1078,11 @@ static inline long ksys_open(const char __user *filename, int flags, return do_sys_open(AT_FDCWD, filename, flags, mode); } +extern long do_sys_truncate(const char __user *pathname, loff_t length); + +static inline long ksys_truncate(const char __user *pathname, loff_t length) +{ + return do_sys_truncate(pathname, length); +} + #endif -- cgit v1.2.3 From 36028d5dd71175c332ab634e089e16dbdfe3812b Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Mar 2018 17:38:31 +0100 Subject: fs: add ksys_p{read,write}64() helpers; remove in-kernel calls to syscalls Using the ksys_p{read,write}64() wrappers allows us to get rid of in-kernel calls to the sys_pread64() and sys_pwrite64() syscalls. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_p{read,write}64(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 78b79e3a1279..a30e4c2d0c27 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -965,6 +965,10 @@ int ksys_unshare(unsigned long unshare_flags); int ksys_setsid(void); int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, unsigned int flags); +ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, + loff_t pos); +ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, + size_t count, loff_t pos); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From edf292c76b884a499cc60ad5cdada2663cc39a2f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Mar 2018 17:46:32 +0100 Subject: fs: add ksys_fallocate() wrapper; remove in-kernel calls to sys_fallocate() Using the ksys_fallocate() wrapper allows us to get rid of in-kernel calls to the sys_fallocate() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_fallocate(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Al Viro Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a30e4c2d0c27..613b8127834d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -969,6 +969,7 @@ ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, loff_t pos); ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); +int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From 9d5b7c956b09daab955fb2a42447d5d89ff15093 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:45 +0100 Subject: mm: add ksys_fadvise64_64() helper; remove in-kernel call to sys_fadvise64_64() Using the ksys_fadvise64_64() helper allows us to avoid the in-kernel calls to the sys_fadvise64_64() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as ksys_fadvise64_64(). Some compat stubs called sys_fadvise64(), which then just passed through the arguments to sys_fadvise64_64(). Get rid of this indirection, and call ksys_fadvise64_64() directly. This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Andrew Morton Cc: linux-mm@kvack.org Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 613b8127834d..466d408deefd 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -970,6 +970,15 @@ ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len); +#ifdef CONFIG_ADVISE_SYSCALLS +int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); +#else +static inline int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, + int advice) +{ + return -EINVAL; +} +#endif /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From a90f590a1bee36fc2129cfb38ceec24a555bb12d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:46 +0100 Subject: mm: add ksys_mmap_pgoff() helper; remove in-kernel calls to sys_mmap_pgoff() Using this helper allows us to avoid the in-kernel calls to the sys_mmap_pgoff() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_mmap_pgoff(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Andrew Morton Cc: linux-mm@kvack.org Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 466d408deefd..ec866c959e7d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -979,6 +979,9 @@ static inline int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, return -EINVAL; } #endif +unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From c7b95d5156a9ee70f800bd2e47a9eba677be73e1 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Mar 2018 17:51:36 +0100 Subject: mm: add ksys_readahead() helper; remove in-kernel calls to sys_readahead() Using this helper allows us to avoid the in-kernel calls to the sys_readahead() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_readahead(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Andrew Morton Cc: linux-mm@kvack.org Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ec866c959e7d..815fbdd9cca1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -982,6 +982,7 @@ static inline int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); +ssize_t ksys_readahead(int fd, loff_t offset, size_t count); /* * The following kernel syscall equivalents are just wrappers to fs-internal -- cgit v1.2.3 From 1bec510a9ebf00baa1aa8751e4a5d88b54efb748 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 22 Mar 2018 17:46:57 +0100 Subject: kexec: move sys_kexec_load() prototype to syscalls.h As the syscall function should only be called from the system call table but not from elsewhere in the kernel, move the prototype for sys_kexec_load() to include/syscall.h. Cc: Eric Biederman Cc: kexec@lists.infradead.org Signed-off-by: Dominik Brodowski --- include/linux/kexec.h | 4 ---- include/linux/syscalls.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f16f6ceb3875..0ebcbeb21056 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -223,10 +223,6 @@ struct kimage { extern void machine_kexec(struct kimage *image); extern int machine_kexec_prepare(struct kimage *image); extern void machine_kexec_cleanup(struct kimage *image); -extern asmlinkage long sys_kexec_load(unsigned long entry, - unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags); extern int kernel_kexec(void); extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 815fbdd9cca1..8330f046541e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -936,6 +936,10 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); +asmlinkage long sys_kexec_load(unsigned long entry, + unsigned long nr_segments, + struct kexec_segment __user *segments, + unsigned long flags); asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); -- cgit v1.2.3 From 3c1c456f9b96c208c9dc9ad7aa3be36b8d488504 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 25 Mar 2018 21:50:11 +0200 Subject: syscalls: sort syscall prototypes in include/linux/syscalls.h Shuffle the syscall prototypes in include/linux/syscalls.h around so that they are kept in the same order as in include/uapi/asm-generic/unistd.h. The individual entries are kept the same, and neither modified to bring them in line with kernel coding style nor wrapped in proper ifdefs. Cc: Arnd Bergmann Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1226 +++++++++++++++++++++++++--------------------- 1 file changed, 678 insertions(+), 548 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8330f046541e..1f223b7cf16d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -241,223 +241,28 @@ static inline void addr_limit_user_check(void) #endif } -asmlinkage long sys_time(time_t __user *tloc); -asmlinkage long sys_stime(time_t __user *tptr); -asmlinkage long sys_gettimeofday(struct timeval __user *tv, - struct timezone __user *tz); -asmlinkage long sys_settimeofday(struct timeval __user *tv, - struct timezone __user *tz); -asmlinkage long sys_adjtimex(struct timex __user *txc_p); - -asmlinkage long sys_times(struct tms __user *tbuf); - -asmlinkage long sys_gettid(void); -asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp); -asmlinkage long sys_alarm(unsigned int seconds); -asmlinkage long sys_getpid(void); -asmlinkage long sys_getppid(void); -asmlinkage long sys_getuid(void); -asmlinkage long sys_geteuid(void); -asmlinkage long sys_getgid(void); -asmlinkage long sys_getegid(void); -asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid); -asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid); -asmlinkage long sys_getpgid(pid_t pid); -asmlinkage long sys_getpgrp(void); -asmlinkage long sys_getsid(pid_t pid); -asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist); - -asmlinkage long sys_setregid(gid_t rgid, gid_t egid); -asmlinkage long sys_setgid(gid_t gid); -asmlinkage long sys_setreuid(uid_t ruid, uid_t euid); -asmlinkage long sys_setuid(uid_t uid); -asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid); -asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid); -asmlinkage long sys_setfsuid(uid_t uid); -asmlinkage long sys_setfsgid(gid_t gid); -asmlinkage long sys_setpgid(pid_t pid, pid_t pgid); -asmlinkage long sys_setsid(void); -asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist); - -asmlinkage long sys_acct(const char __user *name); -asmlinkage long sys_capget(cap_user_header_t header, - cap_user_data_t dataptr); -asmlinkage long sys_capset(cap_user_header_t header, - const cap_user_data_t data); -asmlinkage long sys_personality(unsigned int personality); - -asmlinkage long sys_sigpending(old_sigset_t __user *uset); -asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, - old_sigset_t __user *oset); -asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, - struct sigaltstack __user *uoss); - -asmlinkage long sys_getitimer(int which, struct itimerval __user *value); -asmlinkage long sys_setitimer(int which, - struct itimerval __user *value, - struct itimerval __user *ovalue); -asmlinkage long sys_timer_create(clockid_t which_clock, - struct sigevent __user *timer_event_spec, - timer_t __user * created_timer_id); -asmlinkage long sys_timer_gettime(timer_t timer_id, - struct itimerspec __user *setting); -asmlinkage long sys_timer_getoverrun(timer_t timer_id); -asmlinkage long sys_timer_settime(timer_t timer_id, int flags, - const struct itimerspec __user *new_setting, - struct itimerspec __user *old_setting); -asmlinkage long sys_timer_delete(timer_t timer_id); -asmlinkage long sys_clock_settime(clockid_t which_clock, - const struct timespec __user *tp); -asmlinkage long sys_clock_gettime(clockid_t which_clock, - struct timespec __user *tp); -asmlinkage long sys_clock_adjtime(clockid_t which_clock, - struct timex __user *tx); -asmlinkage long sys_clock_getres(clockid_t which_clock, - struct timespec __user *tp); -asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, - const struct timespec __user *rqtp, - struct timespec __user *rmtp); - -asmlinkage long sys_nice(int increment); -asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, - struct sched_param __user *param); -asmlinkage long sys_sched_setparam(pid_t pid, - struct sched_param __user *param); -asmlinkage long sys_sched_setattr(pid_t pid, - struct sched_attr __user *attr, - unsigned int flags); -asmlinkage long sys_sched_getscheduler(pid_t pid); -asmlinkage long sys_sched_getparam(pid_t pid, - struct sched_param __user *param); -asmlinkage long sys_sched_getattr(pid_t pid, - struct sched_attr __user *attr, - unsigned int size, - unsigned int flags); -asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr); -asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr); -asmlinkage long sys_sched_yield(void); -asmlinkage long sys_sched_get_priority_max(int policy); -asmlinkage long sys_sched_get_priority_min(int policy); -asmlinkage long sys_sched_rr_get_interval(pid_t pid, - struct timespec __user *interval); -asmlinkage long sys_setpriority(int which, int who, int niceval); -asmlinkage long sys_getpriority(int which, int who); - -asmlinkage long sys_shutdown(int, int); -asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, - void __user *arg); -asmlinkage long sys_restart_syscall(void); -asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags); -asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd, - unsigned long cmdline_len, - const char __user *cmdline_ptr, - unsigned long flags); - -asmlinkage long sys_exit(int error_code); -asmlinkage long sys_exit_group(int error_code); -asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, - int options, struct rusage __user *ru); -asmlinkage long sys_waitid(int which, pid_t pid, - struct siginfo __user *infop, - int options, struct rusage __user *ru); -asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options); -asmlinkage long sys_set_tid_address(int __user *tidptr); -asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, - struct timespec __user *utime, u32 __user *uaddr2, - u32 val3); - -asmlinkage long sys_init_module(void __user *umod, unsigned long len, - const char __user *uargs); -asmlinkage long sys_delete_module(const char __user *name_user, - unsigned int flags); - -#ifdef CONFIG_OLD_SIGSUSPEND -asmlinkage long sys_sigsuspend(old_sigset_t mask); -#endif - -#ifdef CONFIG_OLD_SIGSUSPEND3 -asmlinkage long sys_sigsuspend(int unused1, int unused2, old_sigset_t mask); -#endif - -asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); - -#ifdef CONFIG_OLD_SIGACTION -asmlinkage long sys_sigaction(int, const struct old_sigaction __user *, - struct old_sigaction __user *); -#endif - -#ifndef CONFIG_ODD_RT_SIGACTION -asmlinkage long sys_rt_sigaction(int, - const struct sigaction __user *, - struct sigaction __user *, - size_t); -#endif -asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set, - sigset_t __user *oset, size_t sigsetsize); -asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize); -asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese, - siginfo_t __user *uinfo, - const struct timespec __user *uts, - size_t sigsetsize); -asmlinkage long sys_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, - siginfo_t __user *uinfo); -asmlinkage long sys_kill(pid_t pid, int sig); -asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig); -asmlinkage long sys_tkill(pid_t pid, int sig); -asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo); -asmlinkage long sys_sgetmask(void); -asmlinkage long sys_ssetmask(int newmask); -asmlinkage long sys_signal(int sig, __sighandler_t handler); -asmlinkage long sys_pause(void); - -asmlinkage long sys_sync(void); -asmlinkage long sys_fsync(unsigned int fd); -asmlinkage long sys_fdatasync(unsigned int fd); -asmlinkage long sys_bdflush(int func, long data); -asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name, - char __user *type, unsigned long flags, - void __user *data); -asmlinkage long sys_umount(char __user *name, int flags); -asmlinkage long sys_oldumount(char __user *name); -asmlinkage long sys_truncate(const char __user *path, long length); -asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); -asmlinkage long sys_stat(const char __user *filename, - struct __old_kernel_stat __user *statbuf); -asmlinkage long sys_statfs(const char __user * path, - struct statfs __user *buf); -asmlinkage long sys_statfs64(const char __user *path, size_t sz, - struct statfs64 __user *buf); -asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf); -asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, - struct statfs64 __user *buf); -asmlinkage long sys_lstat(const char __user *filename, - struct __old_kernel_stat __user *statbuf); -asmlinkage long sys_fstat(unsigned int fd, - struct __old_kernel_stat __user *statbuf); -asmlinkage long sys_newstat(const char __user *filename, - struct stat __user *statbuf); -asmlinkage long sys_newlstat(const char __user *filename, - struct stat __user *statbuf); -asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf); -asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf); -#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) -asmlinkage long sys_stat64(const char __user *filename, - struct stat64 __user *statbuf); -asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); -asmlinkage long sys_lstat64(const char __user *filename, - struct stat64 __user *statbuf); -asmlinkage long sys_fstatat64(int dfd, const char __user *filename, - struct stat64 __user *statbuf, int flag); -#endif -#if BITS_PER_LONG == 32 -asmlinkage long sys_truncate64(const char __user *path, loff_t length); -asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); -#endif +/* + * These syscall function prototypes are kept in the same order as + * include/uapi/asm-generic/unistd.h. Architecture specific entries go below, + * followed by deprecated or obsolete system calls. + * + * Please note that these prototypes here are only provided for information + * purposes, for static analysis, and for linking from the syscall table. + * These functions should not be called elsewhere from kernel code. + */ +asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx); +asmlinkage long sys_io_destroy(aio_context_t ctx); +asmlinkage long sys_io_submit(aio_context_t, long, + struct iocb __user * __user *); +asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, + struct io_event __user *result); +asmlinkage long sys_io_getevents(aio_context_t ctx_id, + long min_nr, + long nr, + struct io_event __user *events, + struct timespec __user *timeout); +/* fs/xattr.c */ asmlinkage long sys_setxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, @@ -481,125 +286,126 @@ asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); -asmlinkage long sys_brk(unsigned long brk); -asmlinkage long sys_mprotect(unsigned long start, size_t len, - unsigned long prot); -asmlinkage long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr); -asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, - unsigned long prot, unsigned long pgoff, - unsigned long flags); -asmlinkage long sys_msync(unsigned long start, size_t len, int flags); -asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice); -asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); -asmlinkage long sys_munmap(unsigned long addr, size_t len); -asmlinkage long sys_mlock(unsigned long start, size_t len); -asmlinkage long sys_munlock(unsigned long start, size_t len); -asmlinkage long sys_mlockall(int flags); -asmlinkage long sys_munlockall(void); -asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); -asmlinkage long sys_mincore(unsigned long start, size_t len, - unsigned char __user * vec); +/* fs/dcache.c */ +asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_pivot_root(const char __user *new_root, - const char __user *put_old); -asmlinkage long sys_chroot(const char __user *filename); -asmlinkage long sys_mknod(const char __user *filename, umode_t mode, - unsigned dev); -asmlinkage long sys_link(const char __user *oldname, - const char __user *newname); -asmlinkage long sys_symlink(const char __user *old, const char __user *new); -asmlinkage long sys_unlink(const char __user *pathname); -asmlinkage long sys_rename(const char __user *oldname, - const char __user *newname); -asmlinkage long sys_chmod(const char __user *filename, umode_t mode); -asmlinkage long sys_fchmod(unsigned int fd, umode_t mode); +/* fs/cookies.c */ +asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len); + +/* fs/eventfd.c */ +asmlinkage long sys_eventfd2(unsigned int count, int flags); +/* fs/eventpoll.c */ +asmlinkage long sys_epoll_create1(int flags); +asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, + struct epoll_event __user *event); +asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout, + const sigset_t __user *sigmask, + size_t sigsetsize); + +/* fs/fcntl.c */ +asmlinkage long sys_dup(unsigned int fildes); +asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags); asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); #if BITS_PER_LONG == 32 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg); #endif -asmlinkage long sys_pipe(int __user *fildes); -asmlinkage long sys_pipe2(int __user *fildes, int flags); -asmlinkage long sys_dup(unsigned int fildes); -asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); -asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags); -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); + +/* fs/inotify_user.c */ +asmlinkage long sys_inotify_init1(int flags); +asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, + u32 mask); +asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd); + +/* fs/ioctl.c */ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); + +/* fs/ioprio.c */ +asmlinkage long sys_ioprio_set(int which, int who, int ioprio); +asmlinkage long sys_ioprio_get(int which, int who); + +/* fs/locks.c */ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd); -asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx); -asmlinkage long sys_io_destroy(aio_context_t ctx); -asmlinkage long sys_io_getevents(aio_context_t ctx_id, - long min_nr, - long nr, - struct io_event __user *events, - struct timespec __user *timeout); -asmlinkage long sys_io_submit(aio_context_t, long, - struct iocb __user * __user *); -asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, - struct io_event __user *result); -asmlinkage long sys_sendfile(int out_fd, int in_fd, - off_t __user *offset, size_t count); -asmlinkage long sys_sendfile64(int out_fd, int in_fd, - loff_t __user *offset, size_t count); -asmlinkage long sys_readlink(const char __user *path, - char __user *buf, int bufsiz); -asmlinkage long sys_creat(const char __user *pathname, umode_t mode); -asmlinkage long sys_open(const char __user *filename, - int flags, umode_t mode); + +/* fs/namei.c */ +asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode, + unsigned dev); +asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, umode_t mode); +asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag); +asmlinkage long sys_symlinkat(const char __user * oldname, + int newdfd, const char __user * newname); +asmlinkage long sys_linkat(int olddfd, const char __user *oldname, + int newdfd, const char __user *newname, int flags); +asmlinkage long sys_renameat(int olddfd, const char __user * oldname, + int newdfd, const char __user * newname); + +/* fs/namespace.c */ +asmlinkage long sys_umount(char __user *name, int flags); +asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name, + char __user *type, unsigned long flags, + void __user *data); +asmlinkage long sys_pivot_root(const char __user *new_root, + const char __user *put_old); + +/* fs/nfsctl.c */ + +/* fs/open.c */ +asmlinkage long sys_statfs(const char __user * path, + struct statfs __user *buf); +asmlinkage long sys_statfs64(const char __user *path, size_t sz, + struct statfs64 __user *buf); +asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf); +asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, + struct statfs64 __user *buf); +asmlinkage long sys_truncate(const char __user *path, long length); +asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); +#if BITS_PER_LONG == 32 +asmlinkage long sys_truncate64(const char __user *path, loff_t length); +asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); +#endif +asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); +asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); +asmlinkage long sys_chdir(const char __user *filename); +asmlinkage long sys_fchdir(unsigned int fd); +asmlinkage long sys_chroot(const char __user *filename); +asmlinkage long sys_fchmod(unsigned int fd, umode_t mode); +asmlinkage long sys_fchmodat(int dfd, const char __user * filename, + umode_t mode); +asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, + gid_t group, int flag); +asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); +asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, + umode_t mode); asmlinkage long sys_close(unsigned int fd); -asmlinkage long sys_access(const char __user *filename, int mode); asmlinkage long sys_vhangup(void); -asmlinkage long sys_chown(const char __user *filename, - uid_t user, gid_t group); -asmlinkage long sys_lchown(const char __user *filename, - uid_t user, gid_t group); -asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); -#ifdef CONFIG_HAVE_UID16 -asmlinkage long sys_chown16(const char __user *filename, - old_uid_t user, old_gid_t group); -asmlinkage long sys_lchown16(const char __user *filename, - old_uid_t user, old_gid_t group); -asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group); -asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid); -asmlinkage long sys_setgid16(old_gid_t gid); -asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid); -asmlinkage long sys_setuid16(old_uid_t uid); -asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid); -asmlinkage long sys_getresuid16(old_uid_t __user *ruid, - old_uid_t __user *euid, old_uid_t __user *suid); -asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid); -asmlinkage long sys_getresgid16(old_gid_t __user *rgid, - old_gid_t __user *egid, old_gid_t __user *sgid); -asmlinkage long sys_setfsuid16(old_uid_t uid); -asmlinkage long sys_setfsgid16(old_gid_t gid); -asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist); -asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist); -asmlinkage long sys_getuid16(void); -asmlinkage long sys_geteuid16(void); -asmlinkage long sys_getgid16(void); -asmlinkage long sys_getegid16(void); -#endif -asmlinkage long sys_utime(char __user *filename, - struct utimbuf __user *times); -asmlinkage long sys_utimes(char __user *filename, - struct timeval __user *utimes); -asmlinkage long sys_lseek(unsigned int fd, off_t offset, - unsigned int whence); +/* fs/pipe.c */ +asmlinkage long sys_pipe2(int __user *fildes, int flags); + +/* fs/quota.c */ +asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, + qid_t id, void __user *addr); + +/* fs/readdir.c */ +asmlinkage long sys_getdents64(unsigned int fd, + struct linux_dirent64 __user *dirent, + unsigned int count); + +/* fs/read_write.c */ asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, unsigned long offset_low, loff_t __user *result, unsigned int whence); +asmlinkage long sys_lseek(unsigned int fd, off_t offset, + unsigned int whence); asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count); -asmlinkage long sys_readahead(int fd, loff_t offset, size_t count); +asmlinkage long sys_write(unsigned int fd, const char __user *buf, + size_t count); asmlinkage long sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen); -asmlinkage long sys_write(unsigned int fd, const char __user *buf, - size_t count); asmlinkage long sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen); @@ -609,264 +415,314 @@ asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos); asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); -asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, unsigned long pos_l, unsigned long pos_h, - rwf_t flags); asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); -asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, unsigned long pos_l, unsigned long pos_h, - rwf_t flags); -asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode); -asmlinkage long sys_chdir(const char __user *filename); -asmlinkage long sys_fchdir(unsigned int fd); -asmlinkage long sys_rmdir(const char __user *pathname); -asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len); -asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, - qid_t id, void __user *addr); -asmlinkage long sys_getdents(unsigned int fd, - struct linux_dirent __user *dirent, - unsigned int count); -asmlinkage long sys_getdents64(unsigned int fd, - struct linux_dirent64 __user *dirent, - unsigned int count); -asmlinkage long sys_setsockopt(int fd, int level, int optname, - char __user *optval, int optlen); -asmlinkage long sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen); -asmlinkage long sys_bind(int, struct sockaddr __user *, int); -asmlinkage long sys_connect(int, struct sockaddr __user *, int); -asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); -asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); -asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); -asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); -asmlinkage long sys_send(int, void __user *, size_t, unsigned); -asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, - struct sockaddr __user *, int); -asmlinkage long sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); -asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, - unsigned int vlen, unsigned flags); -asmlinkage long sys_recv(int, void __user *, size_t, unsigned); -asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, - struct sockaddr __user *, int __user *); -asmlinkage long sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags); -asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, - unsigned int vlen, unsigned flags, - struct timespec __user *timeout); -asmlinkage long sys_socket(int, int, int); -asmlinkage long sys_socketpair(int, int, int, int __user *); -asmlinkage long sys_socketcall(int call, unsigned long __user *args); -asmlinkage long sys_listen(int, int); -asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, - int timeout); -asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timeval __user *tvp); -asmlinkage long sys_old_select(struct sel_arg_struct __user *arg); -asmlinkage long sys_epoll_create(int size); -asmlinkage long sys_epoll_create1(int flags); -asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, - struct epoll_event __user *event); -asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout); -asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout, - const sigset_t __user *sigmask, +/* fs/sendfile.c */ +asmlinkage long sys_sendfile64(int out_fd, int in_fd, + loff_t __user *offset, size_t count); + +/* fs/select.c */ +asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, + fd_set __user *, struct timespec __user *, + void __user *); +asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, + struct timespec __user *, const sigset_t __user *, + size_t); + +/* fs/signalfd.c */ +asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, int flags); + +/* fs/splice.c */ +asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags); +asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, + int fd_out, loff_t __user *off_out, + size_t len, unsigned int flags); +asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags); + +/* fs/stat.c */ +asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, + int bufsiz); +asmlinkage long sys_newfstatat(int dfd, const char __user *filename, + struct stat __user *statbuf, int flag); +asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf); +#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) +asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); +asmlinkage long sys_fstatat64(int dfd, const char __user *filename, + struct stat64 __user *statbuf, int flag); +#endif + +/* fs/sync.c */ +asmlinkage long sys_sync(void); +asmlinkage long sys_fsync(unsigned int fd); +asmlinkage long sys_fdatasync(unsigned int fd); +asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, + loff_t offset, loff_t nbytes); +asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, + unsigned int flags); + +/* fs/timerfd.c */ +asmlinkage long sys_timerfd_create(int clockid, int flags); +asmlinkage long sys_timerfd_settime(int ufd, int flags, + const struct itimerspec __user *utmr, + struct itimerspec __user *otmr); +asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); + +/* fs/utimes.c */ +asmlinkage long sys_utimensat(int dfd, const char __user *filename, + struct timespec __user *utimes, int flags); + +/* kernel/acct.c */ +asmlinkage long sys_acct(const char __user *name); + +/* kernel/capability.c */ +asmlinkage long sys_capget(cap_user_header_t header, + cap_user_data_t dataptr); +asmlinkage long sys_capset(cap_user_header_t header, + const cap_user_data_t data); + +/* kernel/exec_domain.c */ +asmlinkage long sys_personality(unsigned int personality); + +/* kernel/exit.c */ +asmlinkage long sys_exit(int error_code); +asmlinkage long sys_exit_group(int error_code); +asmlinkage long sys_waitid(int which, pid_t pid, + struct siginfo __user *infop, + int options, struct rusage __user *ru); + +/* kernel/fork.c */ +asmlinkage long sys_set_tid_address(int __user *tidptr); +asmlinkage long sys_unshare(unsigned long unshare_flags); + +/* kernel/futex.c */ +asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, + struct timespec __user *utime, u32 __user *uaddr2, + u32 val3); +asmlinkage long sys_get_robust_list(int pid, + struct robust_list_head __user * __user *head_ptr, + size_t __user *len_ptr); +asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, + size_t len); + +/* kernel/hrtimer.c */ +asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp); + +/* kernel/itimer.c */ +asmlinkage long sys_getitimer(int which, struct itimerval __user *value); +asmlinkage long sys_setitimer(int which, + struct itimerval __user *value, + struct itimerval __user *ovalue); + +/* kernel/kexec.c */ +asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, + struct kexec_segment __user *segments, + unsigned long flags); + +/* kernel/module.c */ +asmlinkage long sys_init_module(void __user *umod, unsigned long len, + const char __user *uargs); +asmlinkage long sys_delete_module(const char __user *name_user, + unsigned int flags); + +/* kernel/posix-timers.c */ +asmlinkage long sys_timer_create(clockid_t which_clock, + struct sigevent __user *timer_event_spec, + timer_t __user * created_timer_id); +asmlinkage long sys_timer_gettime(timer_t timer_id, + struct itimerspec __user *setting); +asmlinkage long sys_timer_getoverrun(timer_t timer_id); +asmlinkage long sys_timer_settime(timer_t timer_id, int flags, + const struct itimerspec __user *new_setting, + struct itimerspec __user *old_setting); +asmlinkage long sys_timer_delete(timer_t timer_id); +asmlinkage long sys_clock_settime(clockid_t which_clock, + const struct timespec __user *tp); +asmlinkage long sys_clock_gettime(clockid_t which_clock, + struct timespec __user *tp); +asmlinkage long sys_clock_getres(clockid_t which_clock, + struct timespec __user *tp); +asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, + const struct timespec __user *rqtp, + struct timespec __user *rmtp); + +/* kernel/printk.c */ +asmlinkage long sys_syslog(int type, char __user *buf, int len); + +/* kernel/ptrace.c */ +asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, + unsigned long data); +/* kernel/sched/core.c */ + +asmlinkage long sys_sched_setparam(pid_t pid, + struct sched_param __user *param); +asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, + struct sched_param __user *param); +asmlinkage long sys_sched_getscheduler(pid_t pid); +asmlinkage long sys_sched_getparam(pid_t pid, + struct sched_param __user *param); +asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, + unsigned long __user *user_mask_ptr); +asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, + unsigned long __user *user_mask_ptr); +asmlinkage long sys_sched_yield(void); +asmlinkage long sys_sched_get_priority_max(int policy); +asmlinkage long sys_sched_get_priority_min(int policy); +asmlinkage long sys_sched_rr_get_interval(pid_t pid, + struct timespec __user *interval); + +/* kernel/signal.c */ +asmlinkage long sys_restart_syscall(void); +asmlinkage long sys_kill(pid_t pid, int sig); +asmlinkage long sys_tkill(pid_t pid, int sig); +asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig); +asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss, + struct sigaltstack __user *uoss); +asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); +#ifndef CONFIG_ODD_RT_SIGACTION +asmlinkage long sys_rt_sigaction(int, + const struct sigaction __user *, + struct sigaction __user *, + size_t); +#endif +asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set, + sigset_t __user *oset, size_t sigsetsize); +asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize); +asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese, + siginfo_t __user *uinfo, + const struct timespec __user *uts, size_t sigsetsize); -asmlinkage long sys_gethostname(char __user *name, int len); +asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo); + +/* kernel/sys.c */ +asmlinkage long sys_setpriority(int which, int who, int niceval); +asmlinkage long sys_getpriority(int which, int who); +asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, + void __user *arg); +asmlinkage long sys_setregid(gid_t rgid, gid_t egid); +asmlinkage long sys_setgid(gid_t gid); +asmlinkage long sys_setreuid(uid_t ruid, uid_t euid); +asmlinkage long sys_setuid(uid_t uid); +asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid); +asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid); +asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid); +asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid); +asmlinkage long sys_setfsuid(uid_t uid); +asmlinkage long sys_setfsgid(gid_t gid); +asmlinkage long sys_times(struct tms __user *tbuf); +asmlinkage long sys_setpgid(pid_t pid, pid_t pgid); +asmlinkage long sys_getpgid(pid_t pid); +asmlinkage long sys_getsid(pid_t pid); +asmlinkage long sys_setsid(void); +asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist); +asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist); +asmlinkage long sys_newuname(struct new_utsname __user *name); asmlinkage long sys_sethostname(char __user *name, int len); asmlinkage long sys_setdomainname(char __user *name, int len); -asmlinkage long sys_newuname(struct new_utsname __user *name); -asmlinkage long sys_uname(struct old_utsname __user *); -asmlinkage long sys_olduname(struct oldold_utsname __user *); - asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim); -#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT -asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim); -#endif asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim); -asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, - const struct rlimit64 __user *new_rlim, - struct rlimit64 __user *old_rlim); asmlinkage long sys_getrusage(int who, struct rusage __user *ru); asmlinkage long sys_umask(int mask); +asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5); +asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache); + +/* kernel/time.c */ +asmlinkage long sys_gettimeofday(struct timeval __user *tv, + struct timezone __user *tz); +asmlinkage long sys_settimeofday(struct timeval __user *tv, + struct timezone __user *tz); +asmlinkage long sys_adjtimex(struct timex __user *txc_p); + +/* kernel/timer.c */ +asmlinkage long sys_getpid(void); +asmlinkage long sys_getppid(void); +asmlinkage long sys_getuid(void); +asmlinkage long sys_geteuid(void); +asmlinkage long sys_getgid(void); +asmlinkage long sys_getegid(void); +asmlinkage long sys_gettid(void); +asmlinkage long sys_sysinfo(struct sysinfo __user *info); + +/* ipc/mqueue.c */ +asmlinkage long sys_mq_open(const char __user *name, int oflag, umode_t mode, struct mq_attr __user *attr); +asmlinkage long sys_mq_unlink(const char __user *name); +asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout); +asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); +asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); +asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); +/* ipc/msg.c */ asmlinkage long sys_msgget(key_t key, int msgflg); -asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp, - size_t msgsz, int msgflg); +asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg); -asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); +asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp, + size_t msgsz, int msgflg); +/* ipc/sem.c */ asmlinkage long sys_semget(key_t key, int nsems, int semflg); -asmlinkage long sys_semop(int semid, struct sembuf __user *sops, - unsigned nsops); asmlinkage long sys_semctl(int semid, int semnum, int cmd, unsigned long arg); asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops, unsigned nsops, const struct timespec __user *timeout); -asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg); +asmlinkage long sys_semop(int semid, struct sembuf __user *sops, + unsigned nsops); + +/* ipc/shm.c */ asmlinkage long sys_shmget(key_t key, size_t size, int flag); -asmlinkage long sys_shmdt(char __user *shmaddr); asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); -asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second, - unsigned long third, void __user *ptr, long fifth); +asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg); +asmlinkage long sys_shmdt(char __user *shmaddr); -asmlinkage long sys_mq_open(const char __user *name, int oflag, umode_t mode, struct mq_attr __user *attr); -asmlinkage long sys_mq_unlink(const char __user *name); -asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout); -asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); -asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); -asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); +/* net/socket.c */ +asmlinkage long sys_socket(int, int, int); +asmlinkage long sys_socketpair(int, int, int, int __user *); +asmlinkage long sys_bind(int, struct sockaddr __user *, int); +asmlinkage long sys_listen(int, int); +asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); +asmlinkage long sys_connect(int, struct sockaddr __user *, int); +asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); +asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); +asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, + struct sockaddr __user *, int); +asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, + struct sockaddr __user *, int __user *); +asmlinkage long sys_setsockopt(int fd, int level, int optname, + char __user *optval, int optlen); +asmlinkage long sys_getsockopt(int fd, int level, int optname, + char __user *optval, int __user *optlen); +asmlinkage long sys_shutdown(int, int); +asmlinkage long sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); +asmlinkage long sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags); -asmlinkage long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn); -asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn, - unsigned long off, unsigned long len, - void __user *buf); -asmlinkage long sys_pciconfig_write(unsigned long bus, unsigned long dfn, - unsigned long off, unsigned long len, - void __user *buf); +/* mm/filemap.c */ +asmlinkage long sys_readahead(int fd, loff_t offset, size_t count); -asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); -asmlinkage long sys_swapon(const char __user *specialfile, int swap_flags); -asmlinkage long sys_swapoff(const char __user *specialfile); -asmlinkage long sys_sysctl(struct __sysctl_args __user *args); -asmlinkage long sys_sysinfo(struct sysinfo __user *info); -asmlinkage long sys_sysfs(int option, - unsigned long arg1, unsigned long arg2); -asmlinkage long sys_syslog(int type, char __user *buf, int len); -asmlinkage long sys_uselib(const char __user *library); -asmlinkage long sys_ni_syscall(void); -asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, - unsigned long data); +/* mm/nommu.c, also with MMU */ +asmlinkage long sys_brk(unsigned long brk); +asmlinkage long sys_munmap(unsigned long addr, size_t len); +asmlinkage long sys_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr); +/* security/keys/keyctl.c */ asmlinkage long sys_add_key(const char __user *_type, const char __user *_description, const void __user *_payload, size_t plen, key_serial_t destringid); - asmlinkage long sys_request_key(const char __user *_type, const char __user *_description, const char __user *_callout_info, key_serial_t destringid); - asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); -asmlinkage long sys_ioprio_set(int which, int who, int ioprio); -asmlinkage long sys_ioprio_get(int which, int who); -asmlinkage long sys_set_mempolicy(int mode, const unsigned long __user *nmask, - unsigned long maxnode); -asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, - const unsigned long __user *from, - const unsigned long __user *to); -asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, - const void __user * __user *pages, - const int __user *nodes, - int __user *status, - int flags); -asmlinkage long sys_mbind(unsigned long start, unsigned long len, - unsigned long mode, - const unsigned long __user *nmask, - unsigned long maxnode, - unsigned flags); -asmlinkage long sys_get_mempolicy(int __user *policy, - unsigned long __user *nmask, - unsigned long maxnode, - unsigned long addr, unsigned long flags); - -asmlinkage long sys_inotify_init(void); -asmlinkage long sys_inotify_init1(int flags); -asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, - u32 mask); -asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd); - -asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, - __u32 __user *ustatus); -asmlinkage long sys_spu_create(const char __user *name, - unsigned int flags, umode_t mode, int fd); - -asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode, - unsigned dev); -asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, umode_t mode); -asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag); -asmlinkage long sys_symlinkat(const char __user * oldname, - int newdfd, const char __user * newname); -asmlinkage long sys_linkat(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname, int flags); -asmlinkage long sys_renameat(int olddfd, const char __user * oldname, - int newdfd, const char __user * newname); -asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname, - unsigned int flags); -asmlinkage long sys_futimesat(int dfd, const char __user *filename, - struct timeval __user *utimes); -asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); -asmlinkage long sys_fchmodat(int dfd, const char __user * filename, - umode_t mode); -asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, - gid_t group, int flag); -asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, - umode_t mode); -asmlinkage long sys_newfstatat(int dfd, const char __user *filename, - struct stat __user *statbuf, int flag); -asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, - int bufsiz); -asmlinkage long sys_utimensat(int dfd, const char __user *filename, - struct timespec __user *utimes, int flags); -asmlinkage long sys_unshare(unsigned long unshare_flags); - -asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, - int fd_out, loff_t __user *off_out, - size_t len, unsigned int flags); - -asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, - unsigned long nr_segs, unsigned int flags); - -asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags); - -asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, - unsigned int flags); -asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, - loff_t offset, loff_t nbytes); -asmlinkage long sys_get_robust_list(int pid, - struct robust_list_head __user * __user *head_ptr, - size_t __user *len_ptr); -asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, - size_t len); -asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache); -asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask); -asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, int flags); -asmlinkage long sys_timerfd_create(int clockid, int flags); -asmlinkage long sys_timerfd_settime(int ufd, int flags, - const struct itimerspec __user *utmr, - struct itimerspec __user *otmr); -asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); -asmlinkage long sys_eventfd(unsigned int count); -asmlinkage long sys_eventfd2(unsigned int count, int flags); -asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); -asmlinkage long sys_userfaultfd(int flags); -asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); -asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); -asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, - fd_set __user *, struct timespec __user *, - void __user *); -asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, - struct timespec __user *, const sigset_t __user *, - size_t); -asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags); -asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, - u64 mask, int fd, - const char __user *pathname); -asmlinkage long sys_syncfs(int fd); - -asmlinkage long sys_fork(void); -asmlinkage long sys_vfork(void); +/* arch/example/kernel/sys_example.c */ #ifdef CONFIG_CLONE_BACKWARDS asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, unsigned long, int __user *); @@ -879,26 +735,80 @@ asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int __user *, unsigned long); #endif #endif - asmlinkage long sys_execve(const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp); +/* mm/fadvise.c */ +asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice); + +/* mm/, CONFIG_MMU only */ +asmlinkage long sys_swapon(const char __user *specialfile, int swap_flags); +asmlinkage long sys_swapoff(const char __user *specialfile); +asmlinkage long sys_mprotect(unsigned long start, size_t len, + unsigned long prot); +asmlinkage long sys_msync(unsigned long start, size_t len, int flags); +asmlinkage long sys_mlock(unsigned long start, size_t len); +asmlinkage long sys_munlock(unsigned long start, size_t len); +asmlinkage long sys_mlockall(int flags); +asmlinkage long sys_munlockall(void); +asmlinkage long sys_mincore(unsigned long start, size_t len, + unsigned char __user * vec); +asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); +asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, + unsigned long prot, unsigned long pgoff, + unsigned long flags); +asmlinkage long sys_mbind(unsigned long start, unsigned long len, + unsigned long mode, + const unsigned long __user *nmask, + unsigned long maxnode, + unsigned flags); +asmlinkage long sys_get_mempolicy(int __user *policy, + unsigned long __user *nmask, + unsigned long maxnode, + unsigned long addr, unsigned long flags); +asmlinkage long sys_set_mempolicy(int mode, const unsigned long __user *nmask, + unsigned long maxnode); +asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, + const unsigned long __user *from, + const unsigned long __user *to); +asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, + const void __user * __user *pages, + const int __user *nodes, + int __user *status, + int flags); + +asmlinkage long sys_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, + siginfo_t __user *uinfo); asmlinkage long sys_perf_event_open( struct perf_event_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); +asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); +asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, + unsigned int vlen, unsigned flags, + struct timespec __user *timeout); -asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff); -asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); +asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, + int options, struct rusage __user *ru); +asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, + const struct rlimit64 __user *new_rlim, + struct rlimit64 __user *old_rlim); +asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags); +asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, + u64 mask, int fd, + const char __user *pathname); asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, int __user *mnt_id, int flag); asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); +asmlinkage long sys_clock_adjtime(clockid_t which_clock, + struct timex __user *tx); +asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); +asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, + unsigned int vlen, unsigned flags); asmlinkage long sys_process_vm_readv(pid_t pid, const struct iovec __user *lvec, unsigned long liovcnt, @@ -911,39 +821,259 @@ asmlinkage long sys_process_vm_writev(pid_t pid, const struct iovec __user *rvec, unsigned long riovcnt, unsigned long flags); - asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_sched_setattr(pid_t pid, + struct sched_attr __user *attr, + unsigned int flags); +asmlinkage long sys_sched_getattr(pid_t pid, + struct sched_attr __user *attr, + unsigned int size, + unsigned int flags); +asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, + int newdfd, const char __user *newname, + unsigned int flags); asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, const char __user *uargs); asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); +asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); - asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); - +asmlinkage long sys_userfaultfd(int flags); asmlinkage long sys_membarrier(int cmd, int flags); +asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in, int fd_out, loff_t __user *off_out, size_t len, unsigned int flags); - -asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); - +asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, unsigned long pos_l, unsigned long pos_h, + rwf_t flags); +asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec, + unsigned long vlen, unsigned long pos_l, unsigned long pos_h, + rwf_t flags); asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); -asmlinkage long sys_kexec_load(unsigned long entry, - unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags); asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); +/* + * Architecture-specific system calls + */ + +/* arch/x86/kernel/ioport.c */ +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); + +/* pciconfig: alpha, arm, arm64, ia64, sparc */ +asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, + void __user *buf); +asmlinkage long sys_pciconfig_write(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, + void __user *buf); +asmlinkage long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn); + +/* powerpc */ +asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, + __u32 __user *ustatus); +asmlinkage long sys_spu_create(const char __user *name, + unsigned int flags, umode_t mode, int fd); + + +/* + * Deprecated system calls which are still defined in + * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch + */ + +/* __ARCH_WANT_SYSCALL_NO_AT */ +asmlinkage long sys_open(const char __user *filename, + int flags, umode_t mode); +asmlinkage long sys_link(const char __user *oldname, + const char __user *newname); +asmlinkage long sys_unlink(const char __user *pathname); +asmlinkage long sys_mknod(const char __user *filename, umode_t mode, + unsigned dev); +asmlinkage long sys_chmod(const char __user *filename, umode_t mode); +asmlinkage long sys_chown(const char __user *filename, + uid_t user, gid_t group); +asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode); +asmlinkage long sys_rmdir(const char __user *pathname); +asmlinkage long sys_lchown(const char __user *filename, + uid_t user, gid_t group); +asmlinkage long sys_access(const char __user *filename, int mode); +asmlinkage long sys_rename(const char __user *oldname, + const char __user *newname); +asmlinkage long sys_symlink(const char __user *old, const char __user *new); +asmlinkage long sys_utimes(char __user *filename, + struct timeval __user *utimes); +#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) +asmlinkage long sys_stat64(const char __user *filename, + struct stat64 __user *statbuf); +asmlinkage long sys_lstat64(const char __user *filename, + struct stat64 __user *statbuf); +#endif + +/* __ARCH_WANT_SYSCALL_NO_FLAGS */ +asmlinkage long sys_pipe(int __user *fildes); +asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); +asmlinkage long sys_epoll_create(int size); +asmlinkage long sys_inotify_init(void); +asmlinkage long sys_eventfd(unsigned int count); +asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask); + +/* __ARCH_WANT_SYSCALL_OFF_T */ +asmlinkage long sys_sendfile(int out_fd, int in_fd, + off_t __user *offset, size_t count); +asmlinkage long sys_newstat(const char __user *filename, + struct stat __user *statbuf); +asmlinkage long sys_newlstat(const char __user *filename, + struct stat __user *statbuf); +asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice); + +/* __ARCH_WANT_SYSCALL_DEPRECATED */ +asmlinkage long sys_alarm(unsigned int seconds); +asmlinkage long sys_getpgrp(void); +asmlinkage long sys_pause(void); +asmlinkage long sys_time(time_t __user *tloc); +asmlinkage long sys_utime(char __user *filename, + struct utimbuf __user *times); +asmlinkage long sys_creat(const char __user *pathname, umode_t mode); +asmlinkage long sys_getdents(unsigned int fd, + struct linux_dirent __user *dirent, + unsigned int count); +asmlinkage long sys_futimesat(int dfd, const char __user *filename, + struct timeval __user *utimes); +asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct timeval __user *tvp); +asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, + int timeout); +asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, + int maxevents, int timeout); +asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf); +asmlinkage long sys_vfork(void); +asmlinkage long sys_recv(int, void __user *, size_t, unsigned); +asmlinkage long sys_send(int, void __user *, size_t, unsigned); +asmlinkage long sys_bdflush(int func, long data); +asmlinkage long sys_oldumount(char __user *name); +asmlinkage long sys_uselib(const char __user *library); +asmlinkage long sys_sysctl(struct __sysctl_args __user *args); +asmlinkage long sys_sysfs(int option, + unsigned long arg1, unsigned long arg2); +asmlinkage long sys_fork(void); + +/* obsolete: kernel/time/time.c */ +asmlinkage long sys_stime(time_t __user *tptr); + +/* obsolete: kernel/signal.c */ +asmlinkage long sys_sigpending(old_sigset_t __user *uset); +asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, + old_sigset_t __user *oset); +#ifdef CONFIG_OLD_SIGSUSPEND +asmlinkage long sys_sigsuspend(old_sigset_t mask); +#endif + +#ifdef CONFIG_OLD_SIGSUSPEND3 +asmlinkage long sys_sigsuspend(int unused1, int unused2, old_sigset_t mask); +#endif + +#ifdef CONFIG_OLD_SIGACTION +asmlinkage long sys_sigaction(int, const struct old_sigaction __user *, + struct old_sigaction __user *); +#endif +asmlinkage long sys_sgetmask(void); +asmlinkage long sys_ssetmask(int newmask); +asmlinkage long sys_signal(int sig, __sighandler_t handler); + +/* obsolete: kernel/sched/core.c */ +asmlinkage long sys_nice(int increment); + +/* obsolete: kernel/kexec_file.c */ +asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd, + unsigned long cmdline_len, + const char __user *cmdline_ptr, + unsigned long flags); + +/* obsolete: kernel/exit.c */ +asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options); + +/* obsolete: kernel/uid16.c */ +#ifdef CONFIG_HAVE_UID16 +asmlinkage long sys_chown16(const char __user *filename, + old_uid_t user, old_gid_t group); +asmlinkage long sys_lchown16(const char __user *filename, + old_uid_t user, old_gid_t group); +asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group); +asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid); +asmlinkage long sys_setgid16(old_gid_t gid); +asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid); +asmlinkage long sys_setuid16(old_uid_t uid); +asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid); +asmlinkage long sys_getresuid16(old_uid_t __user *ruid, + old_uid_t __user *euid, old_uid_t __user *suid); +asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid); +asmlinkage long sys_getresgid16(old_gid_t __user *rgid, + old_gid_t __user *egid, old_gid_t __user *sgid); +asmlinkage long sys_setfsuid16(old_uid_t uid); +asmlinkage long sys_setfsgid16(old_gid_t gid); +asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist); +asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist); +asmlinkage long sys_getuid16(void); +asmlinkage long sys_geteuid16(void); +asmlinkage long sys_getgid16(void); +asmlinkage long sys_getegid16(void); +#endif + +/* obsolete: net/socket.c */ +asmlinkage long sys_socketcall(int call, unsigned long __user *args); + +/* obsolete: fs/stat.c */ +asmlinkage long sys_stat(const char __user *filename, + struct __old_kernel_stat __user *statbuf); +asmlinkage long sys_lstat(const char __user *filename, + struct __old_kernel_stat __user *statbuf); +asmlinkage long sys_fstat(unsigned int fd, + struct __old_kernel_stat __user *statbuf); +asmlinkage long sys_readlink(const char __user *path, + char __user *buf, int bufsiz); + +/* obsolete: fs/select.c */ +asmlinkage long sys_old_select(struct sel_arg_struct __user *arg); + +/* obsolete: fs/readdir.c */ +asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); + +/* obsolete: kernel/sys.c */ +asmlinkage long sys_gethostname(char __user *name, int len); +asmlinkage long sys_uname(struct old_utsname __user *); +asmlinkage long sys_olduname(struct oldold_utsname __user *); +#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT +asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim); +#endif + +/* obsolete: ipc */ +asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second, + unsigned long third, void __user *ptr, long fifth); + +/* obsolete: mm/ */ +asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); +asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); + + +/* + * Not a real system call, but a placeholder for syscalls which are + * not implemented -- see kernel/sys_ni.c + */ +asmlinkage long sys_ni_syscall(void); + + /* * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly. * Instead, use one of the functions which work equivalently, such as -- cgit v1.2.3 From c679a08983db7c1eb09930570b92ff7c9fd59c1c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 25 Mar 2018 23:04:48 +0200 Subject: syscalls: sort syscall prototypes in include/linux/compat.h Shuffle the syscall prototypes in include/linux/compat.h around so that they are kept in the same order as in include/uapi/asm-generic/unistd.h. The individual entries are kept the same, and neither modified to bring them in line with kernel coding style nor wrapped in proper ifdefs -- as an exception to this, add the prefix "asmlinkage" where it was missing. Cc: Arnd Bergmann Cc: Andrew Morton Signed-off-by: Dominik Brodowski --- include/linux/compat.h | 655 ++++++++++++++++++++++++++++--------------------- 1 file changed, 378 insertions(+), 277 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index f1649a5e6716..f881cce627f6 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -305,10 +305,6 @@ extern int put_compat_rusage(const struct rusage *, struct compat_siginfo; -extern asmlinkage long compat_sys_waitid(int, compat_pid_t, - struct compat_siginfo __user *, int, - struct compat_rusage __user *); - struct compat_dirent { u32 d_ino; compat_off_t d_off; @@ -422,90 +418,6 @@ struct compat_msgbuf; extern void compat_exit_robust_list(struct task_struct *curr); -asmlinkage long -compat_sys_set_robust_list(struct compat_robust_list_head __user *head, - compat_size_t len); -asmlinkage long -compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, - compat_size_t __user *len_ptr); - -asmlinkage long compat_sys_ipc(u32, int, int, u32, compat_uptr_t, u32); -asmlinkage long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg); -asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); -asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, - compat_ssize_t msgsz, int msgflg); -asmlinkage long compat_sys_msgrcv(int msqid, compat_uptr_t msgp, - compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); -long compat_sys_msgctl(int first, int second, void __user *uptr); -long compat_sys_shmctl(int first, int second, void __user *uptr); -long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, - unsigned nsems, const struct compat_timespec __user *timeout); -asmlinkage long compat_sys_keyctl(u32 option, - u32 arg2, u32 arg3, u32 arg4, u32 arg5); -asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); - -asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd, - const struct compat_iovec __user *vec, compat_ulong_t vlen); -asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd, - const struct compat_iovec __user *vec, compat_ulong_t vlen); -asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, - const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high); -asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, - const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high); -asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd, - const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); -asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, - const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); - -asmlinkage long compat_sys_quotactl32(unsigned int cmd, - const char __user *special, qid_t id, void __user *addr); - -#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 -asmlinkage long compat_sys_preadv64(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos); -#endif - -#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 -asmlinkage long compat_sys_pwritev64(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos); -#endif - -#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 -asmlinkage long compat_sys_readv64v2(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, rwf_t flags); -#endif - -#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 -asmlinkage long compat_sys_pwritev64v2(unsigned long fd, - const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, rwf_t flags); -#endif - -asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); - -asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp); -asmlinkage long compat_sys_execveat(int dfd, const char __user *filename, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp, int flags); - -asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct compat_timeval __user *tvp); - -asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg); - -asmlinkage long compat_sys_wait4(compat_pid_t pid, - compat_uint_t __user *stat_addr, int options, - struct compat_rusage __user *ru); - #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG) @@ -518,13 +430,6 @@ int copy_siginfo_from_user32(siginfo_t *to, const struct compat_siginfo __user * int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); -long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, - struct compat_siginfo __user *uinfo); -#ifdef CONFIG_COMPAT_OLD_SIGACTION -asmlinkage long compat_sys_sigaction(int sig, - const struct compat_old_sigaction __user *act, - struct compat_old_sigaction __user *oact); -#endif static inline int compat_timeval_compare(struct compat_timeval *lhs, struct compat_timeval *rhs) @@ -546,13 +451,6 @@ static inline int compat_timespec_compare(struct compat_timespec *lhs, return lhs->tv_nsec - rhs->tv_nsec; } -asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz); -asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz); - -asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); - extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat); /* @@ -578,110 +476,132 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, #endif } -asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, - compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, - const compat_ulong_t __user *new_nodes); - extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); -asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, - compat_long_t addr, compat_long_t data); -asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); +struct epoll_event; /* fortunately, this one is fixed-layout */ + +extern ssize_t compat_rw_copy_check_uvector(int type, + const struct compat_iovec __user *uvector, + unsigned long nr_segs, + unsigned long fast_segs, struct iovec *fast_pointer, + struct iovec **ret_pointer); + +extern void __user *compat_alloc_user_space(unsigned long len); + +int compat_restore_altstack(const compat_stack_t __user *uss); +int __compat_save_altstack(compat_stack_t __user *, unsigned long); +#define compat_save_altstack_ex(uss, sp) do { \ + compat_stack_t __user *__uss = uss; \ + struct task_struct *t = current; \ + put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ + put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ + put_user_ex(t->sas_ss_size, &__uss->ss_size); \ + if (t->sas_ss_flags & SS_AUTODISARM) \ + sas_ss_reset(t); \ +} while (0); + /* - * epoll (fs/eventpoll.c) compat bits follow ... + * These syscall function prototypes are kept in the same order as + * include/uapi/asm-generic/unistd.h. Architecture specific entries go below, + * followed by deprecated or obsolete system calls. + * + * Please note that these prototypes here are only provided for information + * purposes, for static analysis, and for linking from the syscall table. + * These functions should not be called elsewhere from kernel code. */ -struct epoll_event; /* fortunately, this one is fixed-layout */ +asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); +asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, + u32 __user *iocb); +asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id, + compat_long_t min_nr, + compat_long_t nr, + struct io_event __user *events, + struct compat_timespec __user *timeout); + +/* fs/cookies.c */ +asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); + +/* fs/eventpoll.c */ asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); -asmlinkage long compat_sys_utime(const char __user *filename, - struct compat_utimbuf __user *t); -asmlinkage long compat_sys_utimensat(unsigned int dfd, - const char __user *filename, - struct compat_timespec __user *t, - int flags); +/* fs/fcntl.c */ +asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, + compat_ulong_t arg); +asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, + compat_ulong_t arg); -asmlinkage long compat_sys_time(compat_time_t __user *tloc); -asmlinkage long compat_sys_stime(compat_time_t __user *tptr); -asmlinkage long compat_sys_signalfd(int ufd, - const compat_sigset_t __user *sigmask, - compat_size_t sigsetsize); -asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, - const struct compat_itimerspec __user *utmr, - struct compat_itimerspec __user *otmr); -asmlinkage long compat_sys_timerfd_gettime(int ufd, - struct compat_itimerspec __user *otmr); +/* fs/ioctl.c */ +asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, + compat_ulong_t arg); -asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages, - __u32 __user *pages, - const int __user *nodes, - int __user *status, - int flags); -asmlinkage long compat_sys_futimesat(unsigned int dfd, - const char __user *filename, - struct compat_timeval __user *t); -asmlinkage long compat_sys_utimes(const char __user *filename, - struct compat_timeval __user *t); -asmlinkage long compat_sys_newstat(const char __user *filename, - struct compat_stat __user *statbuf); -asmlinkage long compat_sys_newlstat(const char __user *filename, - struct compat_stat __user *statbuf); -asmlinkage long compat_sys_newfstatat(unsigned int dfd, - const char __user *filename, - struct compat_stat __user *statbuf, - int flag); -asmlinkage long compat_sys_newfstat(unsigned int fd, - struct compat_stat __user *statbuf); +/* fs/namespace.c */ +asmlinkage long compat_sys_mount(const char __user *dev_name, + const char __user *dir_name, + const char __user *type, compat_ulong_t flags, + const void __user *data); + +/* fs/open.c */ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf); -asmlinkage long compat_sys_fstatfs(unsigned int fd, - struct compat_statfs __user *buf); asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf); +asmlinkage long compat_sys_fstatfs(unsigned int fd, + struct compat_statfs __user *buf); asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf); -asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, - compat_ulong_t arg); -asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, - compat_ulong_t arg); -asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); -asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id, - compat_long_t min_nr, - compat_long_t nr, - struct io_event __user *events, - struct compat_timespec __user *timeout); -asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, - u32 __user *iocb); -asmlinkage long compat_sys_mount(const char __user *dev_name, - const char __user *dir_name, - const char __user *type, compat_ulong_t flags, - const void __user *data); -asmlinkage long compat_sys_old_readdir(unsigned int fd, - struct compat_old_linux_dirent __user *, - unsigned int count); +asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); +asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); +/* No generic prototype for truncate64, ftruncate64, fallocate */ +asmlinkage long compat_sys_openat(int dfd, const char __user *filename, + int flags, umode_t mode); + +/* fs/readdir.c */ asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count); -asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, - unsigned int nr_segs, unsigned int flags); -asmlinkage long compat_sys_open(const char __user *filename, int flags, - umode_t mode); -asmlinkage long compat_sys_openat(int dfd, const char __user *filename, - int flags, umode_t mode); -asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, - struct file_handle __user *handle, - int flags); -asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); -asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); + +/* fs/read_write.c */ +asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); +asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd, + const struct compat_iovec __user *vec, compat_ulong_t vlen); +asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd, + const struct compat_iovec __user *vec, compat_ulong_t vlen); +/* No generic prototype for pread64 and pwrite64 */ +asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high); +asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high); +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 +asmlinkage long compat_sys_preadv64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos); +#endif + +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 +asmlinkage long compat_sys_pwritev64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos); +#endif + +/* fs/sendfile.c */ +asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, compat_size_t count); +asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, + compat_loff_t __user *offset, compat_size_t count); + +/* fs/select.c */ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, @@ -692,110 +612,149 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, struct compat_timespec __user *tsp, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); + +/* fs/signalfd.c */ asmlinkage long compat_sys_signalfd4(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize, int flags); -asmlinkage long compat_sys_get_mempolicy(int __user *policy, - compat_ulong_t __user *nmask, - compat_ulong_t maxnode, - compat_ulong_t addr, - compat_ulong_t flags); -asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, - compat_ulong_t maxnode); -asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, - compat_ulong_t mode, - compat_ulong_t __user *nmask, - compat_ulong_t maxnode, compat_ulong_t flags); -asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, - char __user *optval, unsigned int optlen); -asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, - unsigned flags); -asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, - unsigned vlen, unsigned int flags); -asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, - unsigned int flags); -asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, - unsigned flags); -asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, - unsigned flags, struct sockaddr __user *addr, - int __user *addrlen); -asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, - unsigned vlen, unsigned int flags, - struct compat_timespec __user *timeout); +/* fs/splice.c */ +asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, + unsigned int nr_segs, unsigned int flags); + +/* fs/stat.c */ +asmlinkage long compat_sys_newfstatat(unsigned int dfd, + const char __user *filename, + struct compat_stat __user *statbuf, + int flag); +asmlinkage long compat_sys_newfstat(unsigned int fd, + struct compat_stat __user *statbuf); + +/* fs/sync.c: No generic prototype for sync_file_range and sync_file_range2 */ + +/* fs/timerfd.c */ +asmlinkage long compat_sys_timerfd_gettime(int ufd, + struct compat_itimerspec __user *otmr); +asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, + const struct compat_itimerspec __user *utmr, + struct compat_itimerspec __user *otmr); + +/* fs/utimes.c */ +asmlinkage long compat_sys_utimensat(unsigned int dfd, + const char __user *filename, + struct compat_timespec __user *t, + int flags); + +/* kernel/exit.c */ +asmlinkage long compat_sys_waitid(int, compat_pid_t, + struct compat_siginfo __user *, int, + struct compat_rusage __user *); + + + +/* kernel/futex.c */ +asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, + struct compat_timespec __user *utime, u32 __user *uaddr2, + u32 val3); +asmlinkage long +compat_sys_set_robust_list(struct compat_robust_list_head __user *head, + compat_size_t len); +asmlinkage long +compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, + compat_size_t __user *len_ptr); + +/* kernel/hrtimer.c */ asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, struct compat_timespec __user *rmtp); + +/* kernel/itimer.c */ asmlinkage long compat_sys_getitimer(int which, struct compat_itimerval __user *it); asmlinkage long compat_sys_setitimer(int which, struct compat_itimerval __user *in, struct compat_itimerval __user *out); -asmlinkage long compat_sys_times(struct compat_tms __user *tbuf); -asmlinkage long compat_sys_setrlimit(unsigned int resource, - struct compat_rlimit __user *rlim); -asmlinkage long compat_sys_getrlimit(unsigned int resource, - struct compat_rlimit __user *rlim); -asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru); -asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, - unsigned int len, - compat_ulong_t __user *user_mask_ptr); -asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, - unsigned int len, - compat_ulong_t __user *user_mask_ptr); + +/* kernel/kexec.c */ +asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, + compat_ulong_t nr_segments, + struct compat_kexec_segment __user *, + compat_ulong_t flags); + +/* kernel/posix-timers.c */ asmlinkage long compat_sys_timer_create(clockid_t which_clock, struct compat_sigevent __user *timer_event_spec, timer_t __user *created_timer_id); +asmlinkage long compat_sys_timer_gettime(timer_t timer_id, + struct compat_itimerspec __user *setting); asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags, struct compat_itimerspec __user *new, struct compat_itimerspec __user *old); -asmlinkage long compat_sys_timer_gettime(timer_t timer_id, - struct compat_itimerspec __user *setting); asmlinkage long compat_sys_clock_settime(clockid_t which_clock, struct compat_timespec __user *tp); asmlinkage long compat_sys_clock_gettime(clockid_t which_clock, struct compat_timespec __user *tp); -asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, - struct compat_timex __user *tp); asmlinkage long compat_sys_clock_getres(clockid_t which_clock, struct compat_timespec __user *tp); asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, struct compat_timespec __user *rqtp, struct compat_timespec __user *rmtp); -asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese, - struct compat_siginfo __user *uinfo, - struct compat_timespec __user *uts, compat_size_t sigsetsize); + +/* kernel/ptrace.c */ +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, + compat_long_t addr, compat_long_t data); + +/* kernel/sched/core.c */ +asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, + unsigned int len, + compat_ulong_t __user *user_mask_ptr); +asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, + unsigned int len, + compat_ulong_t __user *user_mask_ptr); +asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval); + +/* kernel/signal.c */ +asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, + compat_stack_t __user *uoss_ptr); asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat_size_t sigsetsize); -asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, - compat_size_t sigsetsize); -asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset, - compat_size_t sigsetsize); #ifndef CONFIG_ODD_RT_SIGACTION asmlinkage long compat_sys_rt_sigaction(int, const struct compat_sigaction __user *, struct compat_sigaction __user *, compat_size_t); #endif +asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, + compat_sigset_t __user *oset, + compat_size_t sigsetsize); +asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset, + compat_size_t sigsetsize); +asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese, + struct compat_siginfo __user *uinfo, + struct compat_timespec __user *uts, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig, struct compat_siginfo __user *uinfo); +/* No generic prototype for rt_sigreturn */ + +/* kernel/sys.c */ +asmlinkage long compat_sys_times(struct compat_tms __user *tbuf); +asmlinkage long compat_sys_getrlimit(unsigned int resource, + struct compat_rlimit __user *rlim); +asmlinkage long compat_sys_setrlimit(unsigned int resource, + struct compat_rlimit __user *rlim); +asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru); + +/* kernel/time.c */ +asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); +asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, + struct timezone __user *tz); +asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); + +/* kernel/timer.c */ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); -asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, - compat_ulong_t arg); -asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, - struct compat_timespec __user *utime, u32 __user *uaddr2, - u32 val3); -asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen); -asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, - compat_ulong_t nr_segments, - struct compat_kexec_segment __user *, - compat_ulong_t flags); -asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, - const struct compat_mq_attr __user *u_mqstat, - struct compat_mq_attr __user *u_omqstat); -asmlinkage long compat_sys_mq_notify(mqd_t mqdes, - const struct compat_sigevent __user *u_notification); + +/* ipc/mqueue.c */ asmlinkage long compat_sys_mq_open(const char __user *u_name, int oflag, compat_mode_t mode, struct compat_mq_attr __user *u_attr); @@ -807,17 +766,92 @@ asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, compat_size_t msg_len, unsigned int __user *u_msg_prio, const struct compat_timespec __user *u_abs_timeout); -asmlinkage long compat_sys_socketcall(int call, u32 __user *args); -asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); +asmlinkage long compat_sys_mq_notify(mqd_t mqdes, + const struct compat_sigevent __user *u_notification); +asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, + const struct compat_mq_attr __user *u_mqstat, + struct compat_mq_attr __user *u_omqstat); -extern ssize_t compat_rw_copy_check_uvector(int type, - const struct compat_iovec __user *uvector, - unsigned long nr_segs, - unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer); +/* ipc/msg.c */ +asmlinkage long compat_sys_msgctl(int first, int second, void __user *uptr); +asmlinkage long compat_sys_msgrcv(int msqid, compat_uptr_t msgp, + compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); +asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, + compat_ssize_t msgsz, int msgflg); -extern void __user *compat_alloc_user_space(unsigned long len); +/* ipc/sem.c */ +asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); +asmlinkage long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, + unsigned nsems, const struct compat_timespec __user *timeout); + +/* ipc/shm.c */ +asmlinkage long compat_sys_shmctl(int first, int second, void __user *uptr); +asmlinkage long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg); + +/* net/socket.c */ +asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, + unsigned flags, struct sockaddr __user *addr, + int __user *addrlen); +asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, + char __user *optval, unsigned int optlen); +asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, + char __user *optval, int __user *optlen); +asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, + unsigned flags); +asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, + unsigned int flags); + +/* mm/filemap.c: No generic prototype for readahead */ +/* security/keys/keyctl.c */ +asmlinkage long compat_sys_keyctl(u32 option, + u32 arg2, u32 arg3, u32 arg4, u32 arg5); + +/* arch/example/kernel/sys_example.c */ +asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp); + +/* mm/fadvise.c: No generic prototype for fadvise64_64 */ + +/* mm/, CONFIG_MMU only */ +asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, + compat_ulong_t mode, + compat_ulong_t __user *nmask, + compat_ulong_t maxnode, compat_ulong_t flags); +asmlinkage long compat_sys_get_mempolicy(int __user *policy, + compat_ulong_t __user *nmask, + compat_ulong_t maxnode, + compat_ulong_t addr, + compat_ulong_t flags); +asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, + compat_ulong_t maxnode); +asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, + compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, + const compat_ulong_t __user *new_nodes); +asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages, + __u32 __user *pages, + const int __user *nodes, + int __user *status, + int flags); + +asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, + compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo); +asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags, + struct compat_timespec __user *timeout); +asmlinkage long compat_sys_wait4(compat_pid_t pid, + compat_uint_t __user *stat_addr, int options, + struct compat_rusage __user *ru); +asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, + int, const char __user *); +asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, + struct file_handle __user *handle, + int flags); +asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock, + struct compat_timex __user *tp); +asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, const struct compat_iovec __user *lvec, compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, @@ -826,14 +860,89 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, const struct compat_iovec __user *lvec, compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, compat_ulong_t riovcnt, compat_ulong_t flags); +asmlinkage long compat_sys_execveat(int dfd, const char __user *filename, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp, int flags); +asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); +asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, + const struct compat_iovec __user *vec, + compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 +asmlinkage long compat_sys_readv64v2(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos, rwf_t flags); +#endif -asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, - compat_off_t __user *offset, compat_size_t count); -asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, - compat_loff_t __user *offset, compat_size_t count); -asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, - compat_stack_t __user *uoss_ptr); +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +asmlinkage long compat_sys_pwritev64v2(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos, rwf_t flags); +#endif + + +/* + * Architecture-specific system calls + */ + +/* fs/quota/compat.c -- x86 only */ +asmlinkage long compat_sys_quotactl32(unsigned int cmd, + const char __user *special, qid_t id, void __user *addr); + +/* arch_prctl -- x86 */ +asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); + + +/* + * Deprecated system calls which are still defined in + * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch + */ + +/* __ARCH_WANT_SYSCALL_NO_AT */ +asmlinkage long compat_sys_open(const char __user *filename, int flags, + umode_t mode); +asmlinkage long compat_sys_utimes(const char __user *filename, + struct compat_timeval __user *t); + +/* __ARCH_WANT_SYSCALL_NO_FLAGS */ +asmlinkage long compat_sys_signalfd(int ufd, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); + +/* __ARCH_WANT_SYSCALL_OFF_T */ +asmlinkage long compat_sys_newstat(const char __user *filename, + struct compat_stat __user *statbuf); +asmlinkage long compat_sys_newlstat(const char __user *filename, + struct compat_stat __user *statbuf); + +/* __ARCH_WANT_SYSCALL_DEPRECATED */ +asmlinkage long compat_sys_time(compat_time_t __user *tloc); +asmlinkage long compat_sys_utime(const char __user *filename, + struct compat_utimbuf __user *t); +asmlinkage long compat_sys_futimesat(unsigned int dfd, + const char __user *filename, + struct compat_timeval __user *t); +asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + struct compat_timeval __user *tvp); +asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32); +asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, + unsigned flags); +asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); + +/* obsolete: fs/readdir.c */ +asmlinkage long compat_sys_old_readdir(unsigned int fd, + struct compat_old_linux_dirent __user *, + unsigned int count); + +/* obsolete: fs/select.c */ +asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg); + +/* obsolete: ipc */ +asmlinkage long compat_sys_ipc(u32, int, int, u32, compat_uptr_t, u32); +/* obsolete: kernel/signal.c */ #ifdef __ARCH_WANT_SYS_SIGPENDING asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set); #endif @@ -842,26 +951,18 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set); asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *nset, compat_old_sigset_t __user *oset); #endif +#ifdef CONFIG_COMPAT_OLD_SIGACTION +asmlinkage long compat_sys_sigaction(int sig, + const struct compat_old_sigaction __user *act, + struct compat_old_sigaction __user *oact); +#endif -int compat_restore_altstack(const compat_stack_t __user *uss); -int __compat_save_altstack(compat_stack_t __user *, unsigned long); -#define compat_save_altstack_ex(uss, sp) do { \ - compat_stack_t __user *__uss = uss; \ - struct task_struct *t = current; \ - put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ - put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ - put_user_ex(t->sas_ss_size, &__uss->ss_size); \ - if (t->sas_ss_flags & SS_AUTODISARM) \ - sas_ss_reset(t); \ -} while (0); - -asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval); +/* obsolete: kernel/time/time.c */ +asmlinkage long compat_sys_stime(compat_time_t __user *tptr); -asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, - int, const char __user *); +/* obsolete: net/socket.c */ +asmlinkage long compat_sys_socketcall(int call, u32 __user *args); -asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); /* * For most but not all architectures, "am I in a compat syscall?" and -- cgit v1.2.3 From 3e2052e5dd4062ccc7a10e8860aa7d2e58627001 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 22 Mar 2018 14:09:17 +0100 Subject: syscalls/x86: auto-create compat_sys_*() prototypes compat_sys_*() functions are no longer called from within the kernel on x86 except from the system call table. Linking the system call does not require compat_sys_*() function prototypes at least on x86. Therefore, generate compat_sys_*() prototypes on-the-fly within the COMPAT_SYSCALL_DEFINEx() macro, and remove x86-specific prototypes from various header files. Suggested-by: Andy Lutomirski Cc: Arnd Bergmann Cc: David S. Miller Cc: netdev@vger.kernel.org Cc: Thomas Gleixner Cc: Andi Kleen Cc: Ingo Molnar Cc: Andrew Morton Cc: Al Viro Cc: x86@kernel.org Signed-off-by: Dominik Brodowski --- include/linux/compat.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index f881cce627f6..8cb8710db0ab 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -49,6 +49,7 @@ COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\ __attribute__((alias(__stringify(compat_SyS##name)))); \ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ @@ -507,8 +508,8 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long); /* * These syscall function prototypes are kept in the same order as - * include/uapi/asm-generic/unistd.h. Architecture specific entries go below, - * followed by deprecated or obsolete system calls. + * include/uapi/asm-generic/unistd.h. Deprecated or obsolete system calls + * go below. * * Please note that these prototypes here are only provided for information * purposes, for static analysis, and for linking from the syscall table. @@ -882,18 +883,6 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd, #endif -/* - * Architecture-specific system calls - */ - -/* fs/quota/compat.c -- x86 only */ -asmlinkage long compat_sys_quotactl32(unsigned int cmd, - const char __user *special, qid_t id, void __user *addr); - -/* arch_prctl -- x86 */ -asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); - - /* * Deprecated system calls which are still defined in * include/uapi/asm-generic/unistd.h and wanted by >= 1 arch -- cgit v1.2.3 From c9a211951c7c79cfb5de888d7d9550872868b086 Mon Sep 17 00:00:00 2001 From: Howard McLauchlan Date: Wed, 21 Mar 2018 18:59:08 -0700 Subject: bpf: whitelist all syscalls for error injection Error injection is a useful mechanism to fail arbitrary kernel functions. However, it is often hard to guarantee an error propagates appropriately to user space programs. By injecting into syscalls, we can return arbitrary values to user space directly; this increases flexibility and robustness in testing, allowing us to test user space error paths effectively. The following script, for example, fails calls to sys_open() from a given pid: from bcc import BPF from sys import argv pid = argv[1] prog = r""" int kprobe__SyS_open(struct pt_regs *ctx, const char *pathname, int flags) { u32 pid = bpf_get_current_pid_tgid(); if (pid == %s) bpf_override_return(ctx, -ENOMEM); return 0; } """ % pid b = BPF(text=prog) while 1: b.perf_buffer_poll() This patch whitelists all syscalls defined with SYSCALL_DEFINE and COMPAT_SYSCALL_DEFINE for error injection. These changes are not intended to be considered stable, and would normally be configured off. Signed-off-by: Howard McLauchlan Signed-off-by: Dominik Brodowski --- include/linux/compat.h | 3 +++ include/linux/syscalls.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 8cb8710db0ab..9847c5a013c3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -33,6 +33,8 @@ #endif #define COMPAT_SYSCALL_DEFINE0(name) \ + asmlinkage long compat_sys_##name(void); \ + ALLOW_ERROR_INJECTION(compat_sys_##name, ERRNO); \ asmlinkage long compat_sys_##name(void) #define COMPAT_SYSCALL_DEFINE1(name, ...) \ @@ -52,6 +54,7 @@ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\ __attribute__((alias(__stringify(compat_SyS##name)))); \ + ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1f223b7cf16d..b961184f597a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -191,6 +191,8 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long sys_##sname(void); \ + ALLOW_ERROR_INJECTION(sys_##sname, ERRNO); \ asmlinkage long sys_##sname(void) #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) @@ -210,6 +212,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(SyS##name)))); \ + ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ -- cgit v1.2.3 From 8420f71943ae96dcd78da5bd4a5c2827419d340c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 2 Apr 2018 14:45:42 -0500 Subject: signal: Correct the offset of si_pkey and si_lower in struct siginfo on m68k The change moving addr_lsb into the _sigfault union failed to take into account that _sigfault._addr_bnd._lower being a pointer forced the entire union to have pointer alignment. The fix for _sigfault._addr_bnd._lower having pointer alignment failed to take into account that m68k has a pointer alignment less than the size of a pointer. So simply making the padding members pointers changed the location of later members in the structure. Fix this by directly computing the needed size of the padding members, and making the padding members char arrays of the needed size. AKA if __alignof__(void *) is 1 sizeof(short) otherwise __alignof__(void *). Which should be exactly the same rules the compiler whould have used when computing the padding. I have tested this change by adding BUILD_BUG_ONs to m68k to verify the offset of every member of struct siginfo, and with those testing that the offsets of the fields in struct siginfo is the same before I changed the generic _sigfault member and after the correction to the _sigfault member. I have also verified that the x86 with it's own BUILD_BUG_ONs to verify the offsets of the siginfo members also compiles cleanly. Cc: stable@vger.kernel.org Reported-by: Eugene Syromiatnikov Fixes: 859d880cf544 ("signal: Correct the offset of si_pkey in struct siginfo") Fixes: b68a68d3dcc1 ("signal: Move addr_lsb into the _sigfault union for clarity") Signed-off-by: "Eric W. Biederman" --- include/linux/compat.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index e16d07eb08cf..d770e62632d7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -221,6 +221,8 @@ typedef struct compat_siginfo { #ifdef __ARCH_SI_TRAPNO int _trapno; /* TRAP # which caused the signal */ #endif +#define __COMPAT_ADDR_BND_PKEY_PAD (__alignof__(compat_uptr_t) < sizeof(short) ? \ + sizeof(short) : __alignof__(compat_uptr_t)) union { /* * used when si_code=BUS_MCEERR_AR or @@ -229,13 +231,13 @@ typedef struct compat_siginfo { short int _addr_lsb; /* Valid LSB of the reported address. */ /* used when si_code=SEGV_BNDERR */ struct { - compat_uptr_t _dummy_bnd; + char _dummy_bnd[__COMPAT_ADDR_BND_PKEY_PAD]; compat_uptr_t _lower; compat_uptr_t _upper; } _addr_bnd; /* used when si_code=SEGV_PKUERR */ struct { - compat_uptr_t _dummy_pkey; + char _dummy_pkey[__COMPAT_ADDR_BND_PKEY_PAD]; u32 _pkey; } _addr_pkey; }; -- cgit v1.2.3 From 21035965f60b0502fc6537b232839389bb4ce664 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 2 Apr 2018 15:58:31 -0700 Subject: bitmap: fix memset optimization on big-endian systems Commit 2a98dc028f91 ("include/linux/bitmap.h: turn bitmap_set and bitmap_clear into memset when possible") introduced an optimization to bitmap_{set,clear}() which uses memset() when the start and length are constants aligned to a byte. This is wrong on big-endian systems; our bitmaps are arrays of unsigned long, so bit n is not at byte n / 8 in memory. This was caught by the Btrfs selftests, but the bitmap selftests also fail when run on a big-endian machine. We can still use memset if the start and length are aligned to an unsigned long, so do that on big-endian. The same problem applies to the memcmp in bitmap_equal(), so fix it there, too. Fixes: 2a98dc028f91 ("include/linux/bitmap.h: turn bitmap_set and bitmap_clear into memset when possible") Fixes: 2c6deb01525a ("bitmap: use memcmp optimisation in more situations") Cc: stable@kernel.org Reported-by: "Erhard F." Cc: Matthew Wilcox Cc: Rasmus Villemoes Cc: Andrew Morton Cc: Arnd Bergmann Signed-off-by: Omar Sandoval Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5f11fbdc27f8..1ee46f492267 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -302,12 +302,20 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr __bitmap_complement(dst, src, nbits); } +#ifdef __LITTLE_ENDIAN +#define BITMAP_MEM_ALIGNMENT 8 +#else +#define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long)) +#endif +#define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) + static inline int bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); - if (__builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8)) + if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) return !memcmp(src1, src2, nbits / 8); return __bitmap_equal(src1, src2, nbits); } @@ -358,8 +366,10 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start, { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); - else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) && - __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8)) + else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && + IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && + __builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) memset((char *)map + start / 8, 0xff, nbits / 8); else __bitmap_set(map, start, nbits); @@ -370,8 +380,10 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); - else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) && - __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8)) + else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && + IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && + __builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) memset((char *)map + start / 8, 0, nbits / 8); else __bitmap_clear(map, start, nbits); -- cgit v1.2.3 From bc4a48976f57bc88319bfa32690bcc4b6cef4a29 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 28 Mar 2018 13:50:07 +0200 Subject: PCI: endpoint: Simplify epc->ops->set_bar()/pci_epc_set_bar() Add barno and flags to struct epf_bar. That way we can simplify epc->ops->set_bar()/pci_epc_set_bar() by passing a struct *epf_bar instead of a whole lot of arguments. This is needed so that epc->ops->set_bar() implementations can modify BAR flags. Will be utilized in a succeeding patch. Signed-off-by: Niklas Cassel Signed-off-by: Lorenzo Pieralisi Reviewed-by: Gustavo Pimentel Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 6 ++---- include/linux/pci-epf.h | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index a1a5e5df0f66..75bae8aabbf9 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -39,8 +39,7 @@ struct pci_epc_ops { int (*write_header)(struct pci_epc *epc, u8 func_no, struct pci_epf_header *hdr); int (*set_bar)(struct pci_epc *epc, u8 func_no, - enum pci_barno bar, - dma_addr_t bar_phys, size_t size, int flags); + struct pci_epf_bar *epf_bar); void (*clear_bar)(struct pci_epc *epc, u8 func_no, enum pci_barno bar); int (*map_addr)(struct pci_epc *epc, u8 func_no, @@ -127,8 +126,7 @@ void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf); int pci_epc_write_header(struct pci_epc *epc, u8 func_no, struct pci_epf_header *hdr); int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, - enum pci_barno bar, - dma_addr_t bar_phys, size_t size, int flags); + struct pci_epf_bar *epf_bar); void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, int bar); int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, phys_addr_t phys_addr, diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index e897bf076701..f7d6f4883f8b 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -97,6 +97,8 @@ struct pci_epf_driver { struct pci_epf_bar { dma_addr_t phys_addr; size_t size; + enum pci_barno barno; + int flags; }; /** -- cgit v1.2.3 From 77d08dbdae2e70a446c61f5db763deed5947acf3 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 28 Mar 2018 13:50:14 +0200 Subject: PCI: endpoint: Make epc->ops->clear_bar()/pci_epc_clear_bar() take struct *epf_bar Make epc->ops->clear_bar()/pci_epc_clear_bar() take struct *epf_bar. This is needed so that epc->ops->clear_bar() can clear the BAR pair, if the BAR is 64-bits wide. This also makes it possible for pci_epc_clear_bar() to sanity check the flags. Signed-off-by: Niklas Cassel Signed-off-by: Lorenzo Pieralisi Reviewed-by: Gustavo Pimentel --- include/linux/pci-epc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 75bae8aabbf9..af657ca58b70 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -41,7 +41,7 @@ struct pci_epc_ops { int (*set_bar)(struct pci_epc *epc, u8 func_no, struct pci_epf_bar *epf_bar); void (*clear_bar)(struct pci_epc *epc, u8 func_no, - enum pci_barno bar); + struct pci_epf_bar *epf_bar); int (*map_addr)(struct pci_epc *epc, u8 func_no, phys_addr_t addr, u64 pci_addr, size_t size); void (*unmap_addr)(struct pci_epc *epc, u8 func_no, @@ -127,7 +127,8 @@ int pci_epc_write_header(struct pci_epc *epc, u8 func_no, struct pci_epf_header *hdr); int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, struct pci_epf_bar *epf_bar); -void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, int bar); +void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, + struct pci_epf_bar *epf_bar); int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, phys_addr_t phys_addr, u64 pci_addr, size_t size); -- cgit v1.2.3 From 976431b02c2ef92ae3f8b6a7d699fc554025e118 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 29 Mar 2018 17:22:13 -0700 Subject: dax, dm: allow device-mapper to operate without dax support Change device-mapper's DAX dependency to require the presence of at least one DAX_DRIVER. This allows device-mapper to be built without bringing the DAX core along which is especially wasteful when there are no DAX drivers, like BLK_DEV_PMEM, configured. Cc: Alasdair Kergon Reported-by: Bart Van Assche Reported-by: kbuild test robot Reported-by: Arnd Bergmann Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- include/linux/dax.h | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index ae27a7efe7ab..f9eb22ad341e 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -26,16 +26,39 @@ extern struct attribute_group dax_attribute_group; #if IS_ENABLED(CONFIG_DAX) struct dax_device *dax_get_by_host(const char *host); +struct dax_device *alloc_dax(void *private, const char *host, + const struct dax_operations *ops); void put_dax(struct dax_device *dax_dev); +void kill_dax(struct dax_device *dax_dev); +void dax_write_cache(struct dax_device *dax_dev, bool wc); +bool dax_write_cache_enabled(struct dax_device *dax_dev); #else static inline struct dax_device *dax_get_by_host(const char *host) { return NULL; } - +static inline struct dax_device *alloc_dax(void *private, const char *host, + const struct dax_operations *ops) +{ + /* + * Callers should check IS_ENABLED(CONFIG_DAX) to know if this + * NULL is an error or expected. + */ + return NULL; +} static inline void put_dax(struct dax_device *dax_dev) { } +static inline void kill_dax(struct dax_device *dax_dev) +{ +} +static inline void dax_write_cache(struct dax_device *dax_dev, bool wc) +{ +} +static inline bool dax_write_cache_enabled(struct dax_device *dax_dev) +{ + return false; +} #endif struct writeback_control; @@ -89,18 +112,13 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, int dax_read_lock(void); void dax_read_unlock(int id); -struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops); bool dax_alive(struct dax_device *dax_dev); -void kill_dax(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); -void dax_write_cache(struct dax_device *dax_dev, bool wc); -bool dax_write_cache_enabled(struct dax_device *dax_dev); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); -- cgit v1.2.3 From 6db79a88c67e4679d9c1e4a3f05c6385e21f6e9a Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 08:37:44 -0500 Subject: PCI: Add pcie_bandwidth_available() to compute bandwidth available to device Add pcie_bandwidth_available() to compute the bandwidth available to a device. This may be limited by the device itself or by a slower upstream link leading to the device. The available bandwidth at each link along the path is computed as: link_width * link_speed * (1 - encoding_overhead) 2.5 and 5.0 GT/s links use 8b/10b encoding, which reduces the raw bandwidth available by 20%; 8.0 GT/s and faster links use 128b/130b encoding, which reduces it by about 1.5%. The result is in Mb/s, i.e., megabits/second, of raw bandwidth. Also return the device with the slowest link and the speed and width of that link. Signed-off-by: Tal Gilboa [bhelgaas: changelog, leave pcie_get_minimum_link() alone for now, return bw directly, use pci_upstream_bridge(), check "next_bw <= bw" to find uppermost limiting device, return speed/width of the limiting device] Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 8043a5937ad0..f2bf2b7a66c7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1083,6 +1083,9 @@ int pcie_get_mps(struct pci_dev *dev); int pcie_set_mps(struct pci_dev *dev, int mps); int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, + enum pci_bus_speed *speed, + enum pcie_link_width *width); void pcie_flr(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); -- cgit v1.2.3 From 9e506a7b51474241f0c900e53e85512780275c05 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 08:56:47 -0500 Subject: PCI: Add pcie_print_link_status() to log link speed and whether it's limited Add pcie_print_link_status(). This logs the current settings of the link (speed, width, and total available bandwidth). If the device is capable of more bandwidth but is limited by a slower upstream link, we include information about the link that limits the device's performance. The user may be able to move the device to a different slot for better performance. This provides a unified method for all PCI devices to report status and issues, instead of each device reporting in a different way, using different code. Signed-off-by: Tal Gilboa [bhelgaas: changelog, reword log messages, print device capabilities when not limited, print bandwidth in Gb/s] Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f2bf2b7a66c7..38f7957121ef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1086,6 +1086,7 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +void pcie_print_link_status(struct pci_dev *dev); void pcie_flr(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); -- cgit v1.2.3 From 243f29fe449bbead69076ad861dbe8f51b42c4d7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 2 Apr 2018 13:14:25 -0700 Subject: libnvdimm: add an api to cast a 'struct nd_region' to its 'struct device' For debug, it is useful for bus providers to be able to retrieve the 'struct device' associated with an nd_region instance that it registered. We already have to_nd_region() to perform the reverse cast operation, in fact its duplicate declaration can be removed from the private drivers/nvdimm/nd.h header. Reviewed-by: Dave Jiang Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index ff855ed965fb..e0684a678a1b 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -164,6 +164,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); struct nvdimm_bus *to_nvdimm_bus(struct device *dev); struct nvdimm *to_nvdimm(struct device *dev); struct nd_region *to_nd_region(struct device *dev); +struct device *nd_region_dev(struct nd_region *nd_region); struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus); -- cgit v1.2.3 From 1eb5fa849f2bf9186a618e85bea23f02e527540a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 28 Feb 2018 15:59:59 -0500 Subject: dm: allow targets to return output from messages they are sent Could be useful for a target to return stats or other information. If a target does DMEMIT() anything to @result from its .message method then it must return 1 to the caller. Signed-off-By: Mike Snitzer --- include/linux/device-mapper.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index da83f64952e7..1e2426c18eb4 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -87,7 +87,8 @@ typedef void (*dm_resume_fn) (struct dm_target *ti); typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, unsigned status_flags, char *result, unsigned maxlen); -typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); +typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv, + char *result, unsigned maxlen); typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev, fmode_t *mode); -- cgit v1.2.3 From 00716545c894fc464e00612809d9cb836b180c99 Mon Sep 17 00:00:00 2001 From: Denis Semakin Date: Tue, 13 Mar 2018 13:23:45 +0400 Subject: dm: add support for secure erase forwarding Set QUEUE_FLAG_SECERASE in DM device's queue_flags if a DM table's data devices support secure erase. Also, add support for secure erase to both the linear and striped targets. Signed-off-by: Denis Semakin Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 1e2426c18eb4..019e2efc6c25 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -267,6 +267,12 @@ struct dm_target { */ unsigned num_discard_bios; + /* + * The number of secure erase bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. + */ + unsigned num_secure_erase_bios; + /* * The number of WRITE SAME bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. -- cgit v1.2.3 From afa53df869121fd4f6f1265cbe794d64387890ae Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 15 Mar 2018 16:02:31 -0400 Subject: dm bufio: move dm-bufio.h to include/linux/ Move dm-bufio.h to include/linux/ so that external GPL'd DM target modules can use it. It is better to allow the use of dm-bufio than force external modules to implement the equivalent buffered IO mechanism in some new way. The hope is this will encourage the use of dm-bufio; which will then make it easier for a GPL'd external DM target module to be included upstream. A couple dm-bufio EXPORT_SYMBOL exports have also been updated to use EXPORT_SYMBOL_GPL. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- include/linux/dm-bufio.h | 148 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 include/linux/dm-bufio.h (limited to 'include/linux') diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h new file mode 100644 index 000000000000..3c8b7d274bd9 --- /dev/null +++ b/include/linux/dm-bufio.h @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2009-2011 Red Hat, Inc. + * + * Author: Mikulas Patocka + * + * This file is released under the GPL. + */ + +#ifndef _LINUX_DM_BUFIO_H +#define _LINUX_DM_BUFIO_H + +#include +#include + +/*----------------------------------------------------------------*/ + +struct dm_bufio_client; +struct dm_buffer; + +/* + * Create a buffered IO cache on a given device + */ +struct dm_bufio_client * +dm_bufio_client_create(struct block_device *bdev, unsigned block_size, + unsigned reserved_buffers, unsigned aux_size, + void (*alloc_callback)(struct dm_buffer *), + void (*write_callback)(struct dm_buffer *)); + +/* + * Release a buffered IO cache. + */ +void dm_bufio_client_destroy(struct dm_bufio_client *c); + +/* + * Set the sector range. + * When this function is called, there must be no I/O in progress on the bufio + * client. + */ +void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start); + +/* + * WARNING: to avoid deadlocks, these conditions are observed: + * + * - At most one thread can hold at most "reserved_buffers" simultaneously. + * - Each other threads can hold at most one buffer. + * - Threads which call only dm_bufio_get can hold unlimited number of + * buffers. + */ + +/* + * Read a given block from disk. Returns pointer to data. Returns a + * pointer to dm_buffer that can be used to release the buffer or to make + * it dirty. + */ +void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp); + +/* + * Like dm_bufio_read, but return buffer from cache, don't read + * it. If the buffer is not in the cache, return NULL. + */ +void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp); + +/* + * Like dm_bufio_read, but don't read anything from the disk. It is + * expected that the caller initializes the buffer and marks it dirty. + */ +void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp); + +/* + * Prefetch the specified blocks to the cache. + * The function starts to read the blocks and returns without waiting for + * I/O to finish. + */ +void dm_bufio_prefetch(struct dm_bufio_client *c, + sector_t block, unsigned n_blocks); + +/* + * Release a reference obtained with dm_bufio_{read,get,new}. The data + * pointer and dm_buffer pointer is no longer valid after this call. + */ +void dm_bufio_release(struct dm_buffer *b); + +/* + * Mark a buffer dirty. It should be called after the buffer is modified. + * + * In case of memory pressure, the buffer may be written after + * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers. So + * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but + * the actual writing may occur earlier. + */ +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); + +/* + * Mark a part of the buffer dirty. + * + * The specified part of the buffer is scheduled to be written. dm-bufio may + * write the specified part of the buffer or it may write a larger superset. + */ +void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, + unsigned start, unsigned end); + +/* + * Initiate writing of dirty buffers, without waiting for completion. + */ +void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c); + +/* + * Write all dirty buffers. Guarantees that all dirty buffers created prior + * to this call are on disk when this call exits. + */ +int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c); + +/* + * Send an empty write barrier to the device to flush hardware disk cache. + */ +int dm_bufio_issue_flush(struct dm_bufio_client *c); + +/* + * Like dm_bufio_release but also move the buffer to the new + * block. dm_bufio_write_dirty_buffers is needed to commit the new block. + */ +void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block); + +/* + * Free the given buffer. + * This is just a hint, if the buffer is in use or dirty, this function + * does nothing. + */ +void dm_bufio_forget(struct dm_bufio_client *c, sector_t block); + +/* + * Set the minimum number of buffers before cleanup happens. + */ +void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n); + +unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); +sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); +sector_t dm_bufio_get_block_number(struct dm_buffer *b); +void *dm_bufio_get_block_data(struct dm_buffer *b); +void *dm_bufio_get_aux_data(struct dm_buffer *b); +struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b); + +/*----------------------------------------------------------------*/ + +#endif -- cgit v1.2.3 From 989f881ebf77d70e883dd0fbcfa04a058d97f771 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:49:38 -0400 Subject: svc: Simplify ->xpo_secure_port Clean up: Instead of returning a value that is used to set or clear a bit, just make ->xpo_secure_port mangle that bit, and return void. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 1caf7bc83306..19475acb68ea 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,7 +25,7 @@ struct svc_xprt_ops { void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); - int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_secure_port)(struct svc_rqst *rqstp); void (*xpo_kill_temp_xprt)(struct svc_xprt *); }; -- cgit v1.2.3 From ece200ddd54b9ce840cfee554fb812560c545c7d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:51:00 -0400 Subject: sunrpc: Save remote presentation address in svc_xprt for trace events TP_printk defines a format string that is passed to user space for converting raw trace event records to something human-readable. My user space's printf (Oracle Linux 7), however, does not have a %pI format specifier. The result is that what is supposed to be an IP address in the output of "trace-cmd report" is just a string that says the field couldn't be displayed. To fix this, adopt the same approach as the client: maintain a pre- formated presentation address for occasions when %pI is not available. The location of the trace_svc_send trace point is adjusted so that rqst->rq_xprt is not NULL when the trace event is recorded. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 19475acb68ea..c3d72066d4b1 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -83,6 +83,7 @@ struct svc_xprt { size_t xpt_locallen; /* length of address */ struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ + char xpt_remotebuf[INET6_ADDRSTRLEN + 10]; struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ struct list_head xpt_users; /* callbacks on free */ @@ -152,7 +153,10 @@ static inline void svc_xprt_set_remote(struct svc_xprt *xprt, { memcpy(&xprt->xpt_remote, sa, salen); xprt->xpt_remotelen = salen; + snprintf(xprt->xpt_remotebuf, sizeof(xprt->xpt_remotebuf) - 1, + "%pISpc", sa); } + static inline unsigned short svc_addr_port(const struct sockaddr *sa) { const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; -- cgit v1.2.3 From aaba72cd4e793fbf1c04e06dee3d2c3710339678 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:51:39 -0400 Subject: sunrpc: Report per-RPC execution stats Introduce a mechanism to report the server-side execution latency of each RPC. The goal is to enable user space to filter the trace record for latency outliers, build histograms, etc. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 786ae2255f05..3bd7504066e1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -283,6 +283,7 @@ struct svc_rqst { int rq_reserved; /* space on socket outq * reserved for this request */ + ktime_t rq_stime; /* start time */ struct cache_req rq_chandle; /* handle passed to caches for * request delaying -- cgit v1.2.3 From 55f5088c22cc83dbc64394abfbf76cd1ff5e7cd0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:52:27 -0400 Subject: svc: Report xprt dequeue latency Record the time between when a rqstp is enqueued on a transport and when it is dequeued. This includes how long the rqstp waits on the queue and how long it takes the kernel scheduler to wake a nfsd thread to service it. The svc_xprt_dequeue trace point is altered to include the number of microseconds between xprt_enqueue and xprt_dequeue. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3bd7504066e1..dc4c009deec1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -272,6 +272,7 @@ struct svc_rqst { #define RQ_BUSY (6) /* request is busy */ #define RQ_DATA (7) /* request has data */ unsigned long rq_flags; /* flags field */ + ktime_t rq_qtime; /* enqueue time */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ -- cgit v1.2.3 From 8154ef2776aa512a3eaa0e7db030dc4803354d61 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:54:07 -0400 Subject: NFSD: Clean up legacy NFS WRITE argument XDR decoders Move common code in NFSD's legacy NFS WRITE decoders into a helper. The immediate benefit is reduction of code duplication and some nice micro-optimizations (see below). In the long term, this helper can perform a per-transport call-out to fill the rq_vec (say, using RDMA Reads). The legacy WRITE decoders and procs are changed to work like NFSv4, which constructs the rq_vec just before it is about to call vfs_writev. Why? Calling a transport call-out from the proc instead of the XDR decoder means that the incoming FH can be resolved to a particular filesystem and file. This would allow pages from the backing file to be presented to the transport to be filled, rather than presenting anonymous pages and copying or flipping them into the file's page cache later. I also prefer using the pages in rq_arg.pages, instead of pulling the data pages directly out of the rqstp::rq_pages array. This is currently the way the NFSv3 write decoder works, but the other two do not seem to take this approach. Fixing this removes the only reference to rq_pages found in NFSD, eliminating an NFSD assumption about how transports use the pages in rq_pages. Lastly, avoid setting up the first element of rq_vec as a zero- length buffer. This happens with an RDMA transport when a normal Read chunk is present because the data payload is in rq_arg's page list (none of it is in the head buffer). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dc4c009deec1..fb3fcacc1e98 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -495,6 +495,8 @@ void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); +unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, + struct kvec *first, size_t total); #define RPC_MAX_ADDRBUFLEN (63U) -- cgit v1.2.3 From 38a70315599dedacd9ff3bd1016f9048c9d0ad12 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 27 Mar 2018 10:54:21 -0400 Subject: NFSD: Clean up legacy NFS SYMLINK argument XDR decoders Move common code in NFSD's legacy SYMLINK decoders into a helper. The immediate benefits include: - one fewer data copies on transports that support DDP - consistent error checking across all versions - reduction of code duplication - support for both legal forms of SYMLINK requests on RDMA transports for all versions of NFS (in particular, NFSv2, for completeness) In the long term, this helper is an appropriate spot to perform a per-transport call-out to fill the pathname argument using, say, RDMA Reads. Filling the pathname in the proc function also means that eventually the incoming filehandle can be interpreted so that filesystem- specific memory can be allocated as a sink for the pathname argument, rather than using anonymous pages. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fb3fcacc1e98..574368e8a16f 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -497,6 +497,8 @@ struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first, size_t total); +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, + struct kvec *first, size_t total); #define RPC_MAX_ADDRBUFLEN (63U) -- cgit v1.2.3 From b27ddd46245311850f850024df54d0537506f3c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Apr 2018 13:41:26 +0100 Subject: fscache: Pass the correct cancelled indications to fscache_op_complete() The last parameter to fscache_op_complete() is a bool indicating whether or not the operation was cancelled. A lot of the time the inverse value is given or no differentiation is made. Fix this. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 3b03e29e2f1a..b19fa8592fc2 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -185,7 +185,7 @@ static inline void fscache_retrieval_complete(struct fscache_retrieval *op, { atomic_sub(n_pages, &op->n_pages); if (atomic_read(&op->n_pages) <= 0) - fscache_op_complete(&op->op, true); + fscache_op_complete(&op->op, false); } /** -- cgit v1.2.3 From a18feb55769b705a44c4107786c4045eae2e87b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Apr 2018 13:41:27 +0100 Subject: fscache: Add tracepoints Add some tracepoints to fscache: (*) fscache_cookie - Tracks a cookie's usage count. (*) fscache_netfs - Logs registration of a network filesystem, including the pointer to the cookie allocated. (*) fscache_acquire - Logs cookie acquisition. (*) fscache_relinquish - Logs cookie relinquishment. (*) fscache_enable - Logs enablement of a cookie. (*) fscache_disable - Logs disablement of a cookie. (*) fscache_osm - Tracks execution of states in the object state machine. and cachefiles: (*) cachefiles_ref - Tracks a cachefiles object's usage count. (*) cachefiles_lookup - Logs result of lookup_one_len(). (*) cachefiles_mkdir - Logs result of vfs_mkdir(). (*) cachefiles_create - Logs result of vfs_create(). (*) cachefiles_unlink - Logs calls to vfs_unlink(). (*) cachefiles_rename - Logs calls to vfs_rename(). (*) cachefiles_mark_active - Logs an object becoming active. (*) cachefiles_wait_active - Logs a wait for an old object to be destroyed. (*) cachefiles_mark_inactive - Logs an object becoming inactive. (*) cachefiles_mark_buried - Logs the burial of an object. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index b19fa8592fc2..fbe102f37074 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -29,6 +29,18 @@ struct fscache_cache_ops; struct fscache_object; struct fscache_operation; +enum fscache_obj_ref_trace { + fscache_obj_get_add_to_deps, + fscache_obj_get_queue, + fscache_obj_put_alloc_fail, + fscache_obj_put_attach_fail, + fscache_obj_put_drop_obj, + fscache_obj_put_enq_dep, + fscache_obj_put_queue, + fscache_obj_put_work, + fscache_obj_ref__nr_traces +}; + /* * cache tag definition */ @@ -231,7 +243,8 @@ struct fscache_cache_ops { void (*lookup_complete)(struct fscache_object *object); /* increment the usage count on this object (may fail if unmounting) */ - struct fscache_object *(*grab_object)(struct fscache_object *object); + struct fscache_object *(*grab_object)(struct fscache_object *object, + enum fscache_obj_ref_trace why); /* pin an object in the cache */ int (*pin_object)(struct fscache_object *object); @@ -254,7 +267,8 @@ struct fscache_cache_ops { void (*drop_object)(struct fscache_object *object); /* dispose of a reference to an object */ - void (*put_object)(struct fscache_object *object); + void (*put_object)(struct fscache_object *object, + enum fscache_obj_ref_trace why); /* sync a cache */ void (*sync_cache)(struct fscache_cache *cache); -- cgit v1.2.3 From 08c2e3d087840cd1e7141b62d92f3dc897147984 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Apr 2018 13:41:27 +0100 Subject: fscache: Add more tracepoints Add more tracepoints to fscache, including: (*) fscache_page - Tracks netfs pages known to fscache. (*) fscache_check_page - Tracks the netfs querying whether a page is pending storage. (*) fscache_wake_cookie - Tracks cookies being woken up after a page completes/aborts storage in the cache. (*) fscache_op - Tracks operations being initialised. (*) fscache_wrote_page - Tracks return of the backend write_page op. (*) fscache_gang_lookup - Tracks lookup of pages to be stored in the write operation. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index fbe102f37074..3e764fd38d9f 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -135,7 +135,8 @@ extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); -extern void fscache_operation_init(struct fscache_operation *, +extern void fscache_operation_init(struct fscache_cookie *, + struct fscache_operation *, fscache_operation_processor_t, fscache_operation_cancel_t, fscache_operation_release_t); -- cgit v1.2.3 From 402cb8dda949d9b8c0df20ad2527d139faad7ca1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Apr 2018 13:41:28 +0100 Subject: fscache: Attach the index key and aux data to the cookie Attach copies of the index key and auxiliary data to the fscache cookie so that: (1) The callbacks to the netfs for this stuff can be eliminated. This can simplify things in the cache as the information is still available, even after the cache has relinquished the cookie. (2) Simplifies the locking requirements of accessing the information as we don't have to worry about the netfs object going away on us. (3) The cache can do lazy updating of the coherency information on disk. As long as the cache is flushed before reboot/poweroff, there's no need to update the coherency info on disk every time it changes. (4) Cookies can be hashed or put in a tree as the index key is easily available. This allows: (a) Checks for duplicate cookies can be made at the top fscache layer rather than down in the bowels of the cache backend. (b) Caching can be added to a netfs object that has a cookie if the cache is brought online after the netfs object is allocated. A certain amount of space is made in the cookie for inline copies of the data, but if it won't fit there, extra memory will be allocated for it. The downside of this is that live cache operation requires more memory. Signed-off-by: David Howells Acked-by: Anna Schumaker Tested-by: Steve Dickson --- include/linux/fscache.h | 110 ++++++++++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index fe0c349684fa..a2d3a2116248 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -83,17 +83,6 @@ struct fscache_cookie_def { const void *parent_netfs_data, const void *cookie_netfs_data); - /* get an index key - * - should store the key data in the buffer - * - should return the amount of data stored - * - not permitted to return an error - * - the netfs data from the cookie being used as the source is - * presented - */ - uint16_t (*get_key)(const void *cookie_netfs_data, - void *buffer, - uint16_t bufmax); - /* get certain file attributes from the netfs data * - this function can be absent for an index * - not permitted to return an error @@ -102,18 +91,6 @@ struct fscache_cookie_def { */ void (*get_attr)(const void *cookie_netfs_data, uint64_t *size); - /* get the auxiliary data from netfs data - * - this function can be absent if the index carries no state data - * - should store the auxiliary data in the buffer - * - should return the amount of amount stored - * - not permitted to return an error - * - the netfs data from the cookie being used as the source is - * presented - */ - uint16_t (*get_aux)(const void *cookie_netfs_data, - void *buffer, - uint16_t bufmax); - /* consult the netfs about the state of an object * - this function can be absent if the index carries no state data * - the netfs data from the cookie being used as the target is @@ -186,6 +163,19 @@ struct fscache_cookie { #define FSCACHE_COOKIE_RELINQUISHED 4 /* T if cookie has been relinquished */ #define FSCACHE_COOKIE_ENABLED 5 /* T if cookie is enabled */ #define FSCACHE_COOKIE_ENABLEMENT_LOCK 6 /* T if cookie is being en/disabled */ +#define FSCACHE_COOKIE_AUX_UPDATED 7 /* T if the auxiliary data was updated */ + + u8 type; /* Type of object */ + u8 key_len; /* Length of index key */ + u8 aux_len; /* Length of auxiliary data */ + union { + void *key; /* Index key */ + u8 inline_key[16]; /* - If the key is short enough */ + }; + union { + void *aux; /* Auxiliary data */ + u8 inline_aux[8]; /* - If the aux data is short enough */ + }; }; static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie) @@ -208,10 +198,12 @@ extern void __fscache_release_cache_tag(struct fscache_cache_tag *); extern struct fscache_cookie *__fscache_acquire_cookie( struct fscache_cookie *, const struct fscache_cookie_def *, + const void *, size_t, + const void *, size_t, void *, bool); -extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); -extern int __fscache_check_consistency(struct fscache_cookie *); -extern void __fscache_update_cookie(struct fscache_cookie *); +extern void __fscache_relinquish_cookie(struct fscache_cookie *, const void *, bool); +extern int __fscache_check_consistency(struct fscache_cookie *, const void *); +extern void __fscache_update_cookie(struct fscache_cookie *, const void *); extern int __fscache_attr_changed(struct fscache_cookie *); extern void __fscache_invalidate(struct fscache_cookie *); extern void __fscache_wait_on_invalidate(struct fscache_cookie *); @@ -238,8 +230,8 @@ extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *, struct inode *); extern void __fscache_readpages_cancel(struct fscache_cookie *cookie, struct list_head *pages); -extern void __fscache_disable_cookie(struct fscache_cookie *, bool); -extern void __fscache_enable_cookie(struct fscache_cookie *, +extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool); +extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, bool (*)(void *), void *); /** @@ -317,6 +309,10 @@ void fscache_release_cache_tag(struct fscache_cache_tag *tag) * fscache_acquire_cookie - Acquire a cookie to represent a cache object * @parent: The cookie that's to be the parent of this one * @def: A description of the cache object, including callback operations + * @index_key: The index key for this cookie + * @index_key_len: Size of the index key + * @aux_data: The auxiliary data for the cookie (may be NULL) + * @aux_data_len: Size of the auxiliary data buffer * @netfs_data: An arbitrary piece of data to be kept in the cookie to * represent the cache object to the netfs * @enable: Whether or not to enable a data cookie immediately @@ -332,12 +328,18 @@ static inline struct fscache_cookie *fscache_acquire_cookie( struct fscache_cookie *parent, const struct fscache_cookie_def *def, + const void *index_key, + size_t index_key_len, + const void *aux_data, + size_t aux_data_len, void *netfs_data, bool enable) { if (fscache_cookie_valid(parent) && fscache_cookie_enabled(parent)) - return __fscache_acquire_cookie(parent, def, netfs_data, - enable); + return __fscache_acquire_cookie(parent, def, + index_key, index_key_len, + aux_data, aux_data_len, + netfs_data, enable); else return NULL; } @@ -346,36 +348,44 @@ struct fscache_cookie *fscache_acquire_cookie( * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding * it * @cookie: The cookie being returned + * @aux_data: The updated auxiliary data for the cookie (may be NULL) * @retire: True if the cache object the cookie represents is to be discarded * * This function returns a cookie to the cache, forcibly discarding the - * associated cache object if retire is set to true. + * associated cache object if retire is set to true. The opportunity is + * provided to update the auxiliary data in the cache before the object is + * disconnected. * * See Documentation/filesystems/caching/netfs-api.txt for a complete * description. */ static inline -void fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) +void fscache_relinquish_cookie(struct fscache_cookie *cookie, + const void *aux_data, + bool retire) { if (fscache_cookie_valid(cookie)) - __fscache_relinquish_cookie(cookie, retire); + __fscache_relinquish_cookie(cookie, aux_data, retire); } /** - * fscache_check_consistency - Request that if the cache is updated + * fscache_check_consistency - Request validation of a cache's auxiliary data * @cookie: The cookie representing the cache object + * @aux_data: The updated auxiliary data for the cookie (may be NULL) * - * Request an consistency check from fscache, which passes the request - * to the backing cache. + * Request an consistency check from fscache, which passes the request to the + * backing cache. The auxiliary data on the cookie will be updated first if + * @aux_data is set. * * Returns 0 if consistent and -ESTALE if inconsistent. May also * return -ENOMEM and -ERESTARTSYS. */ static inline -int fscache_check_consistency(struct fscache_cookie *cookie) +int fscache_check_consistency(struct fscache_cookie *cookie, + const void *aux_data) { if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_check_consistency(cookie); + return __fscache_check_consistency(cookie, aux_data); else return 0; } @@ -383,18 +393,20 @@ int fscache_check_consistency(struct fscache_cookie *cookie) /** * fscache_update_cookie - Request that a cache object be updated * @cookie: The cookie representing the cache object + * @aux_data: The updated auxiliary data for the cookie (may be NULL) * * Request an update of the index data for the cache object associated with the - * cookie. + * cookie. The auxiliary data on the cookie will be updated first if @aux_data + * is set. * * See Documentation/filesystems/caching/netfs-api.txt for a complete * description. */ static inline -void fscache_update_cookie(struct fscache_cookie *cookie) +void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data) { if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - __fscache_update_cookie(cookie); + __fscache_update_cookie(cookie, aux_data); } /** @@ -780,6 +792,7 @@ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, /** * fscache_disable_cookie - Disable a cookie * @cookie: The cookie representing the cache object + * @aux_data: The updated auxiliary data for the cookie (may be NULL) * @invalidate: Invalidate the backing object * * Disable a cookie from accepting further alloc, read, write, invalidate, @@ -790,34 +803,41 @@ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, * * If @invalidate is set, then the backing object will be invalidated and * detached, otherwise it will just be detached. + * + * If @aux_data is set, then auxiliary data will be updated from that. */ static inline -void fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) +void fscache_disable_cookie(struct fscache_cookie *cookie, + const void *aux_data, + bool invalidate) { if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - __fscache_disable_cookie(cookie, invalidate); + __fscache_disable_cookie(cookie, aux_data, invalidate); } /** * fscache_enable_cookie - Reenable a cookie * @cookie: The cookie representing the cache object + * @aux_data: The updated auxiliary data for the cookie (may be NULL) * @can_enable: A function to permit enablement once lock is held * @data: Data for can_enable() * * Reenable a previously disabled cookie, allowing it to accept further alloc, * read, write, invalidate, update or acquire operations. An attempt will be - * made to immediately reattach the cookie to a backing object. + * made to immediately reattach the cookie to a backing object. If @aux_data + * is set, the auxiliary data attached to the cookie will be updated. * * The can_enable() function is called (if not NULL) once the enablement lock * is held to rule on whether enablement is still permitted to go ahead. */ static inline void fscache_enable_cookie(struct fscache_cookie *cookie, + const void *aux_data, bool (*can_enable)(void *data), void *data) { if (fscache_cookie_valid(cookie) && !fscache_cookie_enabled(cookie)) - __fscache_enable_cookie(cookie, can_enable, data); + __fscache_enable_cookie(cookie, aux_data, can_enable, data); } #endif /* _LINUX_FSCACHE_H */ -- cgit v1.2.3 From fcfaab30933bd151bd8cb4dd07b3f11d885bb611 Mon Sep 17 00:00:00 2001 From: Gabriele Paoloni Date: Thu, 15 Mar 2018 02:15:52 +0800 Subject: PCI: Add fwnode handler as input param of pci_register_io_range() In preparation for having the PCI MMIO helpers use the new generic I/O space management (logical PIO) we need to add the fwnode handler as an extra input parameter. Changes the signature of pci_register_io_range() and its callers as needed. Tested-by: dann frazier Signed-off-by: Gabriele Paoloni Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Acked-by: Rob Herring --- include/linux/pci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 024a1beda008..be686fd87abb 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1226,7 +1226,8 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, void *alignf_data); -int pci_register_io_range(phys_addr_t addr, resource_size_t size); +int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr, + resource_size_t size); unsigned long pci_address_to_pio(phys_addr_t addr); phys_addr_t pci_pio_to_address(unsigned long pio); int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); -- cgit v1.2.3 From 5bd5e8d891c1fd2d966a7e2c26f0452d22410683 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 3 Apr 2018 16:54:10 -0400 Subject: dm: remove fmode_t argument from .prepare_ioctl hook Use the fmode_t that is passed to dm_blk_ioctl() rather than inconsistently (varies across targets) drop it on the floor by overriding it with the fmode_t stored in 'struct dm_dev'. All the persistent reservation functions weren't using the fmode_t they got back from .prepare_ioctl so remove them. Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 019e2efc6c25..ed038fbecd55 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -90,8 +90,7 @@ typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv, char *result, unsigned maxlen); -typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, - struct block_device **bdev, fmode_t *mode); +typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev); /* * These iteration functions are typically used to check (and combine) -- cgit v1.2.3 From 363c5a570d4a386fa1bf8d3833de817d7c4fcda2 Mon Sep 17 00:00:00 2001 From: Aviad Yehezkel Date: Wed, 28 Mar 2018 09:27:52 +0300 Subject: {net,IB}/mlx5: Add ipsec helper Simple wrapper to understand if we are dealing with IPsec flow. Signed-off-by: Aviad Yehezkel Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/fs_helpers.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h index 7b476bbae731..9db21cd0e92c 100644 --- a/include/linux/mlx5/fs_helpers.h +++ b/include/linux/mlx5/fs_helpers.h @@ -38,6 +38,14 @@ #define MLX5_FS_IPV4_VERSION 4 #define MLX5_FS_IPV6_VERSION 6 +static inline bool mlx5_fs_is_ipsec_flow(const u32 *match_c) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters); + + return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); +} + static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c, const u32 *match_v, u8 match) { -- cgit v1.2.3 From 317d359df95dd0cb7653d09b7fc513770590cf85 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Apr 2018 10:05:21 +0200 Subject: sched/core: Force proper alignment of 'struct util_est' For some as yet not understood reason, Tony gets unaligned access traps on IA64 because of: struct util_est ue = READ_ONCE(p->se.avg.util_est); and: WRITE_ONCE(p->se.avg.util_est, ue); introduced by commit: d519329f72a6 ("sched/fair: Update util_est only on util_avg updates") Normally those two fields should end up on an 8-byte aligned location, but UP and RANDSTRUCT can mess that up so enforce the alignment explicitly. Also make the alignment on sched_avg unconditional, as it is really about data locality, not false-sharing. With or without this patch the layout for sched_avg on a ia64-defconfig build looks like: $ pahole -EC sched_avg ia64-defconfig/kernel/sched/core.o die__process_function: tag not supported (INVALID)! struct sched_avg { /* typedef u64 */ long long unsigned int last_update_time; /* 0 8 */ /* typedef u64 */ long long unsigned int load_sum; /* 8 8 */ /* typedef u64 */ long long unsigned int runnable_load_sum; /* 16 8 */ /* typedef u32 */ unsigned int util_sum; /* 24 4 */ /* typedef u32 */ unsigned int period_contrib; /* 28 4 */ long unsigned int load_avg; /* 32 8 */ long unsigned int runnable_load_avg; /* 40 8 */ long unsigned int util_avg; /* 48 8 */ struct util_est { unsigned int enqueued; /* 56 4 */ unsigned int ewma; /* 60 4 */ } util_est; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ /* size: 64, cachelines: 1, members: 9 */ }; Reported-and-Tested-by: Tony Luck Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mel Gorman Cc: Norbert Manthey Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Cc: Vincent Guittot Fixes: d519329f72a6 ("sched/fair: Update util_est only on util_avg updates") Link: http://lkml.kernel.org/r/20180405080521.GG4129@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f228c6033832..b3d697f3b573 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -300,7 +300,7 @@ struct util_est { unsigned int enqueued; unsigned int ewma; #define UTIL_EST_WEIGHT_SHIFT 2 -}; +} __attribute__((__aligned__(sizeof(u64)))); /* * The load_avg/util_avg accumulates an infinite geometric series @@ -364,7 +364,7 @@ struct sched_avg { unsigned long runnable_load_avg; unsigned long util_avg; struct util_est util_est; -}; +} ____cacheline_aligned; struct sched_statistics { #ifdef CONFIG_SCHEDSTATS @@ -435,7 +435,7 @@ struct sched_entity { * Put into separate cache line so it does not * collide with read-mostly values above. */ - struct sched_avg avg ____cacheline_aligned_in_smp; + struct sched_avg avg; #endif }; -- cgit v1.2.3 From 1bd21c6c21e848996339508d3ffb106d505256a8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 5 Apr 2018 11:53:01 +0200 Subject: syscalls/core: Introduce CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y It may be useful for an architecture to override the definitions of the SYSCALL_DEFINE0() and __SYSCALL_DEFINEx() macros in , in particular to use a different calling convention for syscalls. This patch provides a mechanism to do so: It introduces CONFIG_ARCH_HAS_SYSCALL_WRAPPER. If it is enabled, is included in and may be used to define the macros mentioned above. Moreover, as the syscall calling convention may be different if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is set, the syscall function prototypes in are #ifndef'd out in that case. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180405095307.3730-3-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b961184f597a..503ab245d4ce 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -81,6 +81,17 @@ union bpf_attr; #include #include +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER +/* + * It may be useful for an architecture to override the definitions of the + * SYSCALL_DEFINE0() and __SYSCALL_DEFINEx() macros, in particular to use a + * different calling convention for syscalls. To allow for that, the prototypes + * for the sys_*() functions below will *not* be included if + * CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled. + */ +#include +#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ + /* * __MAP - apply a macro to syscall arguments * __MAP(n, m, t1, a1, t2, a2, ..., tn, an) will expand to @@ -189,11 +200,13 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) } #endif +#ifndef SYSCALL_DEFINE0 #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long sys_##sname(void); \ ALLOW_ERROR_INJECTION(sys_##sname, ERRNO); \ asmlinkage long sys_##sname(void) +#endif /* SYSCALL_DEFINE0 */ #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) @@ -209,6 +222,8 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) + +#ifndef __SYSCALL_DEFINEx #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(SyS##name)))); \ @@ -223,6 +238,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) return ret; \ } \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) +#endif /* __SYSCALL_DEFINEx */ /* * Called before coming back to user-mode. Returning to user-mode with an @@ -252,7 +268,12 @@ static inline void addr_limit_user_check(void) * Please note that these prototypes here are only provided for information * purposes, for static analysis, and for linking from the syscall table. * These functions should not be called elsewhere from kernel code. + * + * As the syscall calling convention may be different from the default + * for architectures overriding the syscall calling convention, do not + * include the prototypes if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled. */ +#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx); asmlinkage long sys_io_destroy(aio_context_t ctx); asmlinkage long sys_io_submit(aio_context_t, long, @@ -1076,6 +1097,8 @@ asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); */ asmlinkage long sys_ni_syscall(void); +#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ + /* * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly. -- cgit v1.2.3 From fa697140f9a20119a9ec8fd7460cc4314fbdaff3 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 5 Apr 2018 11:53:02 +0200 Subject: syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems: Each syscall defines a stub which takes struct pt_regs as its only argument. It decodes just those parameters it needs, e.g: asmlinkage long sys_xyzzy(const struct pt_regs *regs) { return SyS_xyzzy(regs->di, regs->si, regs->dx); } This approach avoids leaking random user-provided register content down the call chain. For example, for sys_recv() which is a 4-parameter syscall, the assembly now is (in slightly reordered fashion): : callq <__fentry__> /* decode regs->di, ->si, ->dx and ->r10 */ mov 0x70(%rdi),%rdi mov 0x68(%rdi),%rsi mov 0x60(%rdi),%rdx mov 0x38(%rdi),%rcx [ SyS_recv() is automatically inlined by the compiler, as it is not [yet] used anywhere else ] /* clear %r9 and %r8, the 5th and 6th args */ xor %r9d,%r9d xor %r8d,%r8d /* do the actual work */ callq __sys_recvfrom /* cleanup and return */ cltq retq The only valid place in an x86-64 kernel which rightfully calls a syscall function on its own -- vsyscall -- needs to be modified to pass struct pt_regs onwards as well. To keep the syscall table generation working independent of SYSCALL_PTREGS being enabled, the stubs are named the same as the "original" syscall stubs, i.e. sys_*(). This patch is based on an original proof-of-concept | From: Linus Torvalds | Signed-off-by: Linus Torvalds and was split up and heavily modified by me, in particular to base it on ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being, and to update the vsyscall to the new calling convention. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 503ab245d4ce..d7168b3a4b4c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -102,7 +102,7 @@ union bpf_attr; * for SYSCALL_DEFINE/COMPAT_SYSCALL_DEFINE */ #define __MAP0(m,...) -#define __MAP1(m,t,a) m(t,a) +#define __MAP1(m,t,a,...) m(t,a) #define __MAP2(m,t,a,...) m(t,a), __MAP1(m,__VA_ARGS__) #define __MAP3(m,t,a,...) m(t,a), __MAP2(m,__VA_ARGS__) #define __MAP4(m,t,a,...) m(t,a), __MAP3(m,__VA_ARGS__) -- cgit v1.2.3 From 7303e30ec1d8fb5ca1f07c92d069241c32b2ee1b Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 5 Apr 2018 11:53:03 +0200 Subject: syscalls/core: Prepare CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y for compat syscalls It may be useful for an architecture to override the definitions of the COMPAT_SYSCALL_DEFINE0() and __COMPAT_SYSCALL_DEFINEx() macros in , in particular to use a different calling convention for syscalls. This patch provides a mechanism to do so, based on the previously introduced CONFIG_ARCH_HAS_SYSCALL_WRAPPER. If it is enabled, is included in and may be used to define the macros mentioned above. Moreover, as the syscall calling convention may be different if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is set, the compat syscall function prototypes in are #ifndef'd out in that case. As some of the syscalls and/or compat syscalls may not be present, the COND_SYSCALL() and COND_SYSCALL_COMPAT() macros in kernel/sys_ni.c as well as the SYS_NI() and COMPAT_SYS_NI() macros in kernel/time/posix-stubs.c can be re-defined in iff CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180405095307.3730-5-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- include/linux/compat.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 9847c5a013c3..2d85ec5cfda2 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -24,6 +24,17 @@ #include #include +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER +/* + * It may be useful for an architecture to override the definitions of the + * COMPAT_SYSCALL_DEFINE0 and COMPAT_SYSCALL_DEFINEx() macros, in particular + * to use a different calling convention for syscalls. To allow for that, + + the prototypes for the compat_sys_*() functions below will *not* be included + * if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled. + */ +#include +#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ + #ifndef COMPAT_USE_64BIT_TIME #define COMPAT_USE_64BIT_TIME 0 #endif @@ -32,10 +43,12 @@ #define __SC_DELOUSE(t,v) ((__force t)(unsigned long)(v)) #endif +#ifndef COMPAT_SYSCALL_DEFINE0 #define COMPAT_SYSCALL_DEFINE0(name) \ asmlinkage long compat_sys_##name(void); \ ALLOW_ERROR_INJECTION(compat_sys_##name, ERRNO); \ asmlinkage long compat_sys_##name(void) +#endif /* COMPAT_SYSCALL_DEFINE0 */ #define COMPAT_SYSCALL_DEFINE1(name, ...) \ COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) @@ -50,6 +63,7 @@ #define COMPAT_SYSCALL_DEFINE6(name, ...) \ COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) +#ifndef COMPAT_SYSCALL_DEFINEx #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\ @@ -62,6 +76,7 @@ return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ } \ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) +#endif /* COMPAT_SYSCALL_DEFINEx */ #ifndef compat_user_stack_pointer #define compat_user_stack_pointer() current_user_stack_pointer() @@ -517,7 +532,12 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long); * Please note that these prototypes here are only provided for information * purposes, for static analysis, and for linking from the syscall table. * These functions should not be called elsewhere from kernel code. + * + * As the syscall calling convention may be different from the default + * for architectures overriding the syscall calling convention, do not + * include the prototypes if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled. */ +#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, u32 __user *iocb); @@ -955,6 +975,8 @@ asmlinkage long compat_sys_stime(compat_time_t __user *tptr); /* obsolete: net/socket.c */ asmlinkage long compat_sys_socketcall(int call, u32 __user *args); +#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */ + /* * For most but not all architectures, "am I in a compat syscall?" and -- cgit v1.2.3 From 0e7767687fdabfc58d5046e7488632bf2ecd4d0c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Apr 2018 18:58:27 +0200 Subject: time: tick-sched: Reorganize idle tick management code Prepare the scheduler tick code for reworking the idle loop to avoid stopping the tick in some cases. The idea is to split the nohz idle entry call to decouple the idle time stats accounting and preparatory work from the actual tick stop code, in order to later be able to delay the tick stop once we reach more power-knowledgeable callers. Move away the tick_nohz_start_idle() invocation from __tick_nohz_idle_enter(), rename the latter to __tick_nohz_idle_stop_tick() and define tick_nohz_idle_stop_tick() as a wrapper around it for calling it from the outside. Make tick_nohz_idle_enter() only call tick_nohz_start_idle() instead of calling the entire __tick_nohz_idle_enter(), add another wrapper disabling and enabling interrupts around tick_nohz_idle_stop_tick() and make the current callers of tick_nohz_idle_enter() call it too to retain their current functionality. Signed-off-by: Rafael J. Wysocki Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- include/linux/tick.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 7f8c9a127f5a..1d253df9ea3c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -115,6 +115,7 @@ enum tick_dep_bits { extern bool tick_nohz_enabled; extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped_cpu(int cpu); +extern void tick_nohz_idle_stop_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); @@ -123,10 +124,19 @@ extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); + +static inline void tick_nohz_idle_stop_tick_protected(void) +{ + local_irq_disable(); + tick_nohz_idle_stop_tick(); + local_irq_enable(); +} + #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } +static inline void tick_nohz_idle_stop_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } @@ -136,6 +146,8 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } + +static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3 From 2aaf709a518d26563b80fd7a42379d7aa7ffed4a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 Mar 2018 23:05:50 +0100 Subject: sched: idle: Do not stop the tick upfront in the idle loop Push the decision whether or not to stop the tick somewhat deeper into the idle loop. Stopping the tick upfront leads to unpleasant outcomes in case the idle governor doesn't agree with the nohz code on the duration of the upcoming idle period. Specifically, if the tick has been stopped and the idle governor predicts short idle, the situation is bad regardless of whether or not the prediction is accurate. If it is accurate, the tick has been stopped unnecessarily which means excessive overhead. If it is not accurate, the CPU is likely to spend too much time in the (shallow, because short idle has been predicted) idle state selected by the governor [1]. As the first step towards addressing this problem, change the code to make the tick stopping decision inside of the loop in do_idle(). In particular, do not stop the tick in the cpu_idle_poll() code path. Also don't do that in tick_nohz_irq_exit() which doesn't really have enough information on whether or not to stop the tick. Link: https://marc.info/?l=linux-pm&m=150116085925208&w=2 # [1] Link: https://tu-dresden.de/zih/forschung/ressourcen/dateien/projekte/haec/powernightmares.pdf Suggested-by: Frederic Weisbecker Signed-off-by: Rafael J. Wysocki Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- include/linux/tick.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 1d253df9ea3c..fccebfba167e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -116,6 +116,7 @@ extern bool tick_nohz_enabled; extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped_cpu(int cpu); extern void tick_nohz_idle_stop_tick(void); +extern void tick_nohz_idle_restart_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); @@ -137,6 +138,7 @@ static inline void tick_nohz_idle_stop_tick_protected(void) static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline void tick_nohz_idle_stop_tick(void) { } +static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } -- cgit v1.2.3 From e72bd817aee2bd867a90aac68aca07d99addcb55 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 5 Apr 2018 18:53:26 +0300 Subject: net/mlx5: Query device memory capabilities This patch adds querying of device memory capabilities by the mlx5_core driver during initialization. Device memory capabilities is a new capability type and structure which contains the necessary data that is needed for future device memory allocation. The presence of this new capabilities struct is indicated in the general capabilities struct which is queried first by the driver. If the presence bit is set, the driver will also query the new capabilities struct and save it in the device context. Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/device.h | 9 +++++++++ include/linux/mlx5/mlx5_ifc.h | 20 +++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 413df3c11a46..2651691c05fb 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1013,6 +1013,9 @@ enum mlx5_cap_type { MLX5_CAP_RESERVED, MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, + MLX5_CAP_DEBUG, + MLX5_CAP_RESERVED_14, + MLX5_CAP_DEV_MEM, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1161,6 +1164,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP64_FPGA(mdev, cap) \ MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap) +#define MLX5_CAP_DEV_MEM(mdev, cap)\ + MLX5_GET(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap) + +#define MLX5_CAP64_DEV_MEM(mdev, cap)\ + MLX5_GET64(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 64963fd2cd9b..13c3bf25753b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -658,6 +658,24 @@ struct mlx5_ifc_roce_cap_bits { u8 reserved_at_100[0x700]; }; +struct mlx5_ifc_device_mem_cap_bits { + u8 memic[0x1]; + u8 reserved_at_1[0x1f]; + + u8 reserved_at_20[0xb]; + u8 log_min_memic_alloc_size[0x5]; + u8 reserved_at_30[0x8]; + u8 log_max_memic_addr_alignment[0x8]; + + u8 memic_bar_start_addr[0x40]; + + u8 memic_bar_size[0x20]; + + u8 max_memic_size[0x20]; + + u8 reserved_at_c0[0x740]; +}; + enum { MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, @@ -872,7 +890,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ets[0x1]; u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; - u8 early_vf_enable[0x1]; + u8 device_memory[0x1]; u8 mcam_reg[0x1]; u8 pcam_reg[0x1]; u8 local_ca_ack_delay[0x5]; -- cgit v1.2.3 From 24da00164f7a9c247d2224a54494d0e955199630 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 5 Apr 2018 18:53:27 +0300 Subject: IB/mlx5: Device memory support in mlx5_ib This patch adds the mlx5_ib driver implementation for the device memory allocation API. It implements the ib_device callbacks for allocation and deallocation operations as well as a new mmap command support which allows mapping an allocated device memory to a VMA. The change also adds reporting of device memory maximum size and alignment parameters reported in device capabilities. The allocation/deallocation operations are using new firmware commands to allocate MEMIC memory on the device. Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 55 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 13c3bf25753b..a64e59b65a33 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -92,6 +92,8 @@ enum { MLX5_CMD_OP_DESTROY_MKEY = 0x202, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS = 0x203, MLX5_CMD_OP_PAGE_FAULT_RESUME = 0x204, + MLX5_CMD_OP_ALLOC_MEMIC = 0x205, + MLX5_CMD_OP_DEALLOC_MEMIC = 0x206, MLX5_CMD_OP_CREATE_EQ = 0x301, MLX5_CMD_OP_DESTROY_EQ = 0x302, MLX5_CMD_OP_QUERY_EQ = 0x303, @@ -8886,4 +8888,57 @@ struct mlx5_ifc_destroy_vport_lag_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_alloc_memic_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_30[0x20]; + + u8 reserved_at_40[0x18]; + u8 log_memic_addr_alignment[0x8]; + + u8 range_start_addr[0x40]; + + u8 range_size[0x20]; + + u8 memic_size[0x20]; +}; + +struct mlx5_ifc_alloc_memic_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 memic_start_addr[0x40]; +}; + +struct mlx5_ifc_dealloc_memic_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + u8 memic_start_addr[0x40]; + + u8 memic_size[0x20]; + + u8 reserved_at_e0[0x20]; +}; + +struct mlx5_ifc_dealloc_memic_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From cdbd0d2bae14566cf875595180b91527b4431df8 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 5 Apr 2018 18:53:28 +0300 Subject: net/mlx5: Mkey creation command adjustments This change updates the mlx5 interface to create mkey on the device. The updates in the command mailbox include increasing the access mode type field to 5 bits in order to support additional types such as MLX5_MKC_ACCESS_MODE_MEMIC which represents device memory access type and will be used when registering MR on allocated device memory. All the places that use the old access mode format are adjusted as well. Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a64e59b65a33..fa6f134c85d7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2720,12 +2720,17 @@ enum { MLX5_MKC_ACCESS_MODE_MTT = 0x1, MLX5_MKC_ACCESS_MODE_KLMS = 0x2, MLX5_MKC_ACCESS_MODE_KSM = 0x3, + MLX5_MKC_ACCESS_MODE_MEMIC = 0x5, }; struct mlx5_ifc_mkc_bits { u8 reserved_at_0[0x1]; u8 free[0x1]; - u8 reserved_at_2[0xd]; + u8 reserved_at_2[0x1]; + u8 access_mode_4_2[0x3]; + u8 reserved_at_6[0x7]; + u8 relaxed_ordering_write[0x1]; + u8 reserved_at_e[0x1]; u8 small_fence_on_rdma_read_response[0x1]; u8 umr_en[0x1]; u8 a[0x1]; @@ -2733,7 +2738,7 @@ struct mlx5_ifc_mkc_bits { u8 rr[0x1]; u8 lw[0x1]; u8 lr[0x1]; - u8 access_mode[0x2]; + u8 access_mode_1_0[0x2]; u8 reserved_at_18[0x8]; u8 qpn[0x18]; -- cgit v1.2.3 From 3c8ba0d61d04ced9f8d9ff93977995a9e4e96e91 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 30 Mar 2018 18:52:36 -0700 Subject: kernel.h: Retain constant expression output for max()/min() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the effort to remove all VLAs from the kernel[1], it is desirable to build with -Wvla. However, this warning is overly pessimistic, in that it is only happy with stack array sizes that are declared as constant expressions, and not constant values. One case of this is the evaluation of the max() macro which, due to its construction, ends up converting constant expression arguments into a constant value result. All attempts to rewrite this macro with __builtin_constant_p() failed with older compilers (e.g. gcc 4.4)[2]. However, Martin Uecker, constructed[3] a mind-shattering solution that works everywhere. Cthulhu fhtagn! This patch updates the min()/max() macros to evaluate to a constant expression when called on constant expression arguments. This removes several false-positive stack VLA warnings from an x86 allmodconfig build when -Wvla is added: $ diff -u before.txt after.txt | grep ^- -drivers/input/touchscreen/cyttsp4_core.c:871:2: warning: ISO C90 forbids variable length array ‘ids’ [-Wvla] -fs/btrfs/tree-checker.c:344:4: warning: ISO C90 forbids variable length array ‘namebuf’ [-Wvla] -lib/vsprintf.c:747:2: warning: ISO C90 forbids variable length array ‘sym’ [-Wvla] -net/ipv4/proc.c:403:2: warning: ISO C90 forbids variable length array ‘buff’ [-Wvla] -net/ipv6/proc.c:198:2: warning: ISO C90 forbids variable length array ‘buff’ [-Wvla] -net/ipv6/proc.c:218:2: warning: ISO C90 forbids variable length array ‘buff64’ [-Wvla] This also updates two cases where different enums were being compared and explicitly casts them to int (which matches the old side-effect of the single-evaluation code): one in tpm/tpm_tis_core.h, and one in drm/drm_color_mgmt.c. [1] https://lkml.org/lkml/2018/3/7/621 [2] https://lkml.org/lkml/2018/3/10/170 [3] https://lkml.org/lkml/2018/3/20/845 Co-Developed-by: Linus Torvalds Co-Developed-by: Martin Uecker Signed-off-by: Kees Cook Acked-by: Ingo Molnar Acked-by: Miguel Ojeda Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 71 +++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3cbf3cfff4f0..4ae1dfd9bf05 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -792,41 +792,58 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ /* - * min()/max()/clamp() macros that also do - * strict type-checking.. See the - * "unnecessary" pointer comparison. + * min()/max()/clamp() macros must accomplish three things: + * + * - avoid multiple evaluations of the arguments (so side-effects like + * "x++" happen only once) when non-constant. + * - perform strict type-checking (to generate warnings instead of + * nasty runtime surprises). See the "unnecessary" pointer comparison + * in __typecheck(). + * - retain result as a constant expressions when called with only + * constant expressions (to avoid tripping VLA warnings in stack + * allocation usage). + */ +#define __typecheck(x, y) \ + (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) + +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker */ -#define __min(t1, t2, min1, min2, x, y) ({ \ - t1 min1 = (x); \ - t2 min2 = (y); \ - (void) (&min1 == &min2); \ - min1 < min2 ? min1 : min2; }) +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +#define __no_side_effects(x, y) \ + (__is_constexpr(x) && __is_constexpr(y)) + +#define __safe_cmp(x, y) \ + (__typecheck(x, y) && __no_side_effects(x, y)) + +#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) + +#define __cmp_once(x, y, op) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __cmp(__x, __y, op); }) + +#define __careful_cmp(x, y, op) \ + __builtin_choose_expr(__safe_cmp(x, y), \ + __cmp(x, y, op), __cmp_once(x, y, op)) /** * min - return minimum of two values of the same or compatible types * @x: first value * @y: second value */ -#define min(x, y) \ - __min(typeof(x), typeof(y), \ - __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ - x, y) - -#define __max(t1, t2, max1, max2, x, y) ({ \ - t1 max1 = (x); \ - t2 max2 = (y); \ - (void) (&max1 == &max2); \ - max1 > max2 ? max1 : max2; }) +#define min(x, y) __careful_cmp(x, y, <) /** * max - return maximum of two values of the same or compatible types * @x: first value * @y: second value */ -#define max(x, y) \ - __max(typeof(x), typeof(y), \ - __UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \ - x, y) +#define max(x, y) __careful_cmp(x, y, >) /** * min3 - return minimum of three values @@ -878,10 +895,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @x: first value * @y: second value */ -#define min_t(type, x, y) \ - __min(type, type, \ - __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ - x, y) +#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) /** * max_t - return maximum of two values, using the specified type @@ -889,10 +903,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @x: first value * @y: second value */ -#define max_t(type, x, y) \ - __max(type, type, \ - __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ - x, y) +#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) /** * clamp_t - return a value clamped to a given range using a given type -- cgit v1.2.3 From 36071a279b4100afe9fbee18727ad78daa307591 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:20:22 -0700 Subject: slab: make kmalloc_index() return "unsigned int" kmalloc_index() return index into an array of kmalloc kmem caches, therefore should be unsigned. Space savings with SLUB on trimmed down .config: add/remove: 0/1 grow/shrink: 6/56 up/down: 85/-557 (-472) Function old new delta calculate_sizes 924 983 +59 on_freelist 589 604 +15 init_cache_random_seq 122 127 +5 ext4_mb_init 1206 1210 +4 slab_pad_check.part 270 271 +1 cpu_partial_store 112 113 +1 usersize_show 28 27 -1 ... new_slab 1871 1837 -34 slab_order 204 - -204 This patch start a series of converting SLUB (mostly) to "unsigned int". 1) Most integers in the code are in fact unsigned entities: array indexes, lengths, buffer sizes, allocation orders. It is therefore better to use unsigned variables 2) Some integers in the code are either "size_t" or "unsigned long" for no reason. size_t usually comes from people trying to maintain type correctness and figuring out that "sizeof" operator returns size_t or memset/memcpy takes size_t so should everything passed to it. However the number of 4GB+ objects in the kernel is very small. Most, if not all, dynamically allocated objects with kmalloc() or kmem_cache_create() aren't actually big. Maintaining wide types doesn't do anything. 64-bit ops are bigger than 32-bit on our beloved x86_64, so try to not use 64-bit where it isn't necessary (read: everywhere where integers are integers not pointers) 3) in case of SLAB allocators, there are additional limitations *) page->inuse, page->objects are only 16-/15-bit, *) cache size was always 32-bit *) slab orders are small, order 20 is needed to go 64-bit on x86_64 (PAGE_SIZE << order) Basically everything is 32-bit except kmalloc(1ULL<<32) which gets shortcut through page allocator. Christoph said: : : That changes with large base page size on power and ARM64 f.e. but then : we do not want to encourage larger allocations through slab anyways. Link: http://lkml.kernel.org/r/20180305200730.15812-2-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 231abc8976c5..296f33a512eb 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -308,7 +308,7 @@ extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; * 2 = 129 .. 192 bytes * n = 2^(n-1)+1 .. 2^n */ -static __always_inline int kmalloc_index(size_t size) +static __always_inline unsigned int kmalloc_index(size_t size) { if (!size) return 0; @@ -504,7 +504,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) return kmalloc_large(size, flags); #ifndef CONFIG_SLOB if (!(flags & GFP_DMA)) { - int index = kmalloc_index(size); + unsigned int index = kmalloc_index(size); if (!index) return ZERO_SIZE_PTR; @@ -542,7 +542,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) #ifndef CONFIG_SLOB if (__builtin_constant_p(size) && size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) { - int i = kmalloc_index(size); + unsigned int i = kmalloc_index(size); if (!i) return ZERO_SIZE_PTR; -- cgit v1.2.3 From 0be70327ec8cf6dd6847cbd8b75ca51be864a6ea Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:20:26 -0700 Subject: slab: make kmalloc_size() return "unsigned int" kmalloc_size() derives size of kmalloc cache from internal index, which can't be negative. Propagate unsignedness a bit. Link: http://lkml.kernel.org/r/20180305200730.15812-3-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 296f33a512eb..ad157fbf3886 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -522,11 +522,11 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) * return size or 0 if a kmalloc cache for that * size does not exist */ -static __always_inline int kmalloc_size(int n) +static __always_inline unsigned int kmalloc_size(unsigned int n) { #ifndef CONFIG_SLOB if (n > 2) - return 1 << n; + return 1U << n; if (n == 1 && KMALLOC_MIN_SIZE <= 32) return 96; -- cgit v1.2.3 From f4957d5bd09165b165df851fbf8c658f7fcd9922 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:20:37 -0700 Subject: slab: make kmem_cache_create() work with 32-bit sizes struct kmem_cache::size and ::align were always 32-bit. Out of curiosity I created 4GB kmem_cache, it oopsed with division by 0. kmem_cache_create(1UL<<32+1) created 1-byte cache as expected. size_t doesn't work and never did. Link: http://lkml.kernel.org/r/20180305200730.15812-6-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index ad157fbf3886..d36e8f03730e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -137,11 +137,12 @@ bool slab_is_available(void); extern bool usercopy_fallback; -struct kmem_cache *kmem_cache_create(const char *name, size_t size, - size_t align, slab_flags_t flags, +struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, + unsigned int align, slab_flags_t flags, void (*ctor)(void *)); struct kmem_cache *kmem_cache_create_usercopy(const char *name, - size_t size, size_t align, slab_flags_t flags, + unsigned int size, unsigned int align, + slab_flags_t flags, size_t useroffset, size_t usersize, void (*ctor)(void *)); void kmem_cache_destroy(struct kmem_cache *); -- cgit v1.2.3 From eb7235eb842043ca302e992286ca6af63a8127fe Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:20:48 -0700 Subject: slub: make ->remote_node_defrag_ratio unsigned int ->remote_node_defrag_ratio is in range 0..1000. This also adds a check and modifies the behavior to return an error code. Before this patch invalid values were ignored. Link: http://lkml.kernel.org/r/20180305200730.15812-9-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 8ad99c47b19c..f6548083fe0f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -124,7 +124,7 @@ struct kmem_cache { /* * Defragmentation by allocating from a remote node. */ - int remote_node_defrag_ratio; + unsigned int remote_node_defrag_ratio; #endif #ifdef CONFIG_SLAB_FREELIST_RANDOM -- cgit v1.2.3 From 56d8ceebd39b4db3248291e6d1e3e696fc73b077 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:20:51 -0700 Subject: slub: make ->max_attr_size unsigned int ->max_attr_size is maximum length of every SLAB memcg attribute ever written. VFS limits those to INT_MAX. Link: http://lkml.kernel.org/r/20180305200730.15812-10-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index f6548083fe0f..9bb761324a9c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -110,7 +110,8 @@ struct kmem_cache { #endif #ifdef CONFIG_MEMCG struct memcg_cache_params memcg_params; - int max_attr_size; /* for propagation, maximum size of a stored attr */ + /* for propagation, maximum size of a stored attr */ + unsigned int max_attr_size; #ifdef CONFIG_SYSFS struct kset *memcg_kset; #endif -- cgit v1.2.3 From 2ca6d39b31022bb9e1dda77109e292517f701261 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:20:55 -0700 Subject: slub: make ->red_left_pad unsigned int Padding length can't be negative. Link: http://lkml.kernel.org/r/20180305200730.15812-11-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9bb761324a9c..9f59fc16444b 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -101,7 +101,7 @@ struct kmem_cache { int inuse; /* Offset to metadata */ int align; /* Alignment */ int reserved; /* Reserved bytes at the end of slabs */ - int red_left_pad; /* Left redzone padding size */ + unsigned int red_left_pad; /* Left redzone padding size */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ #ifdef CONFIG_SYSFS -- cgit v1.2.3 From d66e52d1e82b1adfab541f1aad09526ebf67842d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:20:58 -0700 Subject: slub: make ->reserved unsigned int ->reserved is either 0 or sizeof(struct rcu_head), can't be negative. Link: http://lkml.kernel.org/r/20180305200730.15812-12-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9f59fc16444b..2b4417aa15d8 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -100,7 +100,7 @@ struct kmem_cache { void (*ctor)(void *); int inuse; /* Offset to metadata */ int align; /* Alignment */ - int reserved; /* Reserved bytes at the end of slabs */ + unsigned int reserved; /* Reserved bytes at the end of slabs */ unsigned int red_left_pad; /* Left redzone padding size */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ -- cgit v1.2.3 From 3a3791ec2ecd5db8d903b66faa340b0dfa72e64b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:02 -0700 Subject: slub: make ->align unsigned int Kmem cache alignment can't be negative. Link: http://lkml.kernel.org/r/20180305200730.15812-13-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 2b4417aa15d8..2a0eabeff78f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -99,7 +99,7 @@ struct kmem_cache { int refcount; /* Refcount for slab cache destroy */ void (*ctor)(void *); int inuse; /* Offset to metadata */ - int align; /* Alignment */ + unsigned int align; /* Alignment */ unsigned int reserved; /* Reserved bytes at the end of slabs */ unsigned int red_left_pad; /* Left redzone padding size */ const char *name; /* Name (only for display!) */ -- cgit v1.2.3 From 52ee6d74aa23a3c5d4472edf167f2bb47776a733 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:06 -0700 Subject: slub: make ->inuse unsigned int ->inuse is "the number of bytes in actual use by the object", can't be negative. Link: http://lkml.kernel.org/r/20180305200730.15812-14-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 2a0eabeff78f..2287b800474f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -98,7 +98,7 @@ struct kmem_cache { gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(void *); - int inuse; /* Offset to metadata */ + unsigned int inuse; /* Offset to metadata */ unsigned int align; /* Alignment */ unsigned int reserved; /* Reserved bytes at the end of slabs */ unsigned int red_left_pad; /* Left redzone padding size */ -- cgit v1.2.3 From e5d9998f3e09359b372a037a6ac55ba235d95d57 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:10 -0700 Subject: slub: make ->cpu_partial unsigned int /* * cpu_partial determined the maximum number of objects * kept in the per cpu partial lists of a processor. */ Can't be negative. Link: http://lkml.kernel.org/r/20180305200730.15812-15-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 2287b800474f..d2cc1391f17a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -88,7 +88,8 @@ struct kmem_cache { int object_size; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ #ifdef CONFIG_SLUB_CPU_PARTIAL - int cpu_partial; /* Number of per cpu partial objects to keep around */ + /* Number of per cpu partial objects to keep around */ + unsigned int cpu_partial; #endif struct kmem_cache_order_objects oo; -- cgit v1.2.3 From a5035de2c4472d6c58c60a7f8eaad8ed0084b8b2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:13 -0700 Subject: slub: make ->offset unsigned int ->offset is free pointer offset from the start of the object, can't be negative. Link: http://lkml.kernel.org/r/20180305200730.15812-16-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index d2cc1391f17a..db00dbd7e89f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -86,7 +86,7 @@ struct kmem_cache { unsigned long min_partial; int size; /* The size of an object including meta data */ int object_size; /* The size of an object without meta data */ - int offset; /* Free pointer offset. */ + unsigned int offset; /* Free pointer offset. */ #ifdef CONFIG_SLUB_CPU_PARTIAL /* Number of per cpu partial objects to keep around */ unsigned int cpu_partial; -- cgit v1.2.3 From 1b473f29d5dd766903ac2372ac04b07600f233d0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:17 -0700 Subject: slub: make ->object_size unsigned int Linux doesn't support negative length objects. Link: http://lkml.kernel.org/r/20180305200730.15812-17-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index db00dbd7e89f..7d74f121ef4e 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -85,7 +85,7 @@ struct kmem_cache { slab_flags_t flags; unsigned long min_partial; int size; /* The size of an object including meta data */ - int object_size; /* The size of an object without meta data */ + unsigned int object_size;/* The size of an object without meta data */ unsigned int offset; /* Free pointer offset. */ #ifdef CONFIG_SLUB_CPU_PARTIAL /* Number of per cpu partial objects to keep around */ -- cgit v1.2.3 From 44065b2e2975ff5987164b98d29cc78e207f9a5a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:20 -0700 Subject: slub: make ->size unsigned int Linux doesn't support negative length objects (including meta data). Link: http://lkml.kernel.org/r/20180305200730.15812-18-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 7d74f121ef4e..bc02fd3a8ccf 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -84,7 +84,7 @@ struct kmem_cache { /* Used for retriving partial slabs etc */ slab_flags_t flags; unsigned long min_partial; - int size; /* The size of an object including meta data */ + unsigned int size; /* The size of an object including meta data */ unsigned int object_size;/* The size of an object without meta data */ unsigned int offset; /* Free pointer offset. */ #ifdef CONFIG_SLUB_CPU_PARTIAL -- cgit v1.2.3 From be4a7988b35db9e6f95dca818d5e94785840fb58 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:28 -0700 Subject: kasan: make kasan_cache_create() work with 32-bit slab cache sizes If SLAB doesn't support 4GB+ kmem caches (it never did), KASAN should not do it as well. Link: http://lkml.kernel.org/r/20180305200730.15812-20-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d6459bd1376d..de784fd11d12 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -43,7 +43,7 @@ void kasan_unpoison_stack_above_sp_to(const void *watermark); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); -void kasan_cache_create(struct kmem_cache *cache, size_t *size, +void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); @@ -92,7 +92,7 @@ static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} static inline void kasan_cache_create(struct kmem_cache *cache, - size_t *size, + unsigned int *size, slab_flags_t *flags) {} static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} -- cgit v1.2.3 From 7bbdb81ee3de73f2381ceec1bbee831f4c913b5c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:31 -0700 Subject: slab: make usercopy region 32-bit If kmem case sizes are 32-bit, then usecopy region should be too. Link: http://lkml.kernel.org/r/20180305200730.15812-21-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Cc: David Miller Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- include/linux/slab_def.h | 4 ++-- include/linux/slub_def.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index d36e8f03730e..04402c637171 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -143,7 +143,7 @@ struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, struct kmem_cache *kmem_cache_create_usercopy(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, - size_t useroffset, size_t usersize, + unsigned int useroffset, unsigned int usersize, void (*ctor)(void *)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 7385547c04b1..d9228e4d0320 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -85,8 +85,8 @@ struct kmem_cache { unsigned int *random_seq; #endif - size_t useroffset; /* Usercopy region offset */ - size_t usersize; /* Usercopy region size */ + unsigned int useroffset; /* Usercopy region offset */ + unsigned int usersize; /* Usercopy region size */ struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index bc02fd3a8ccf..623d6ba92036 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -137,8 +137,8 @@ struct kmem_cache { struct kasan_cache kasan_info; #endif - size_t useroffset; /* Usercopy region offset */ - size_t usersize; /* Usercopy region size */ + unsigned int useroffset; /* Usercopy region offset */ + unsigned int usersize; /* Usercopy region size */ struct kmem_cache_node *node[MAX_NUMNODES]; }; -- cgit v1.2.3 From 19af27aff901e401a5b79e5c974e881e4701162c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 5 Apr 2018 16:21:39 -0700 Subject: slub: make struct kmem_cache_order_objects::x unsigned int struct kmem_cache_order_objects is for mixing order and number of objects, and orders aren't big enough to warrant 64-bit width. Propagate unsignedness down so that everything fits. !!! Patch assumes that "PAGE_SIZE << order" doesn't overflow. !!! Link: http://lkml.kernel.org/r/20180305200730.15812-23-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 623d6ba92036..3773e26c08c1 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -73,7 +73,7 @@ struct kmem_cache_cpu { * given order would contain. */ struct kmem_cache_order_objects { - unsigned long x; + unsigned int x; }; /* -- cgit v1.2.3 From 310253514bbf179c5f82e20a7a4bbf07abc7f5ad Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 5 Apr 2018 16:22:08 -0700 Subject: mm/migrate: rename migration reason MR_CMA to MR_CONTIG_RANGE alloc_contig_range() initiates compaction and eventual migration for the purpose of either CMA or HugeTLB allocations. At present, the reason code remains the same MR_CMA for either of these cases. Let's make it MR_CONTIG_RANGE which will appropriately reflect the reason code in both these cases. Link: http://lkml.kernel.org/r/20180202091518.18798-1-khandual@linux.vnet.ibm.com Signed-off-by: Anshuman Khandual Acked-by: Michal Hocko Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a2246cf670ba..ab45f8a0d288 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -25,7 +25,7 @@ enum migrate_reason { MR_SYSCALL, /* also applies to cpusets */ MR_MEMPOLICY_MBIND, MR_NUMA_MISPLACED, - MR_CMA, + MR_CONTIG_RANGE, MR_TYPES }; -- cgit v1.2.3 From 3a2d7fa8a3d5ae740bd0c21d933acc6220857ed0 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 5 Apr 2018 16:22:27 -0700 Subject: mm: disable interrupts while initializing deferred pages Vlastimil Babka reported about a window issue during which when deferred pages are initialized, and the current version of on-demand initialization is finished, allocations may fail. While this is highly unlikely scenario, since this kind of allocation request must be large, and must come from interrupt handler, we still want to cover it. We solve this by initializing deferred pages with interrupts disabled, and holding node_size_lock spin lock while pages in the node are being initialized. The on-demand deferred page initialization that comes later will use the same lock, and thus synchronize with deferred_init_memmap(). It is unlikely for threads that initialize deferred pages to be interrupted. They run soon after smp_init(), but before modules are initialized, and long before user space programs. This is why there is no adverse effect of having these threads running with interrupts disabled. [pasha.tatashin@oracle.com: v6] Link: http://lkml.kernel.org/r/20180313182355.17669-2-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180309220807.24961-2-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Andrew Morton Cc: Steven Sistare Cc: Daniel Jordan Cc: Masayoshi Mizuma Cc: Michal Hocko Cc: Catalin Marinas Cc: AKASHI Takahiro Cc: Gioh Kim Cc: Heiko Carstens Cc: Yaowei Bai Cc: Wei Yang Cc: Paul Burton Cc: Miles Chen Cc: Vlastimil Babka Cc: Mel Gorman Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 53 ++++++++++++++++++++++-------------------- include/linux/mmzone.h | 5 ++-- 2 files changed, 31 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index aba5f86eb038..2b0265265c28 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -51,24 +51,6 @@ enum { MMOP_ONLINE_MOVABLE, }; -/* - * pgdat resizing functions - */ -static inline -void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) -{ - spin_lock_irqsave(&pgdat->node_size_lock, *flags); -} -static inline -void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) -{ - spin_unlock_irqrestore(&pgdat->node_size_lock, *flags); -} -static inline -void pgdat_resize_init(struct pglist_data *pgdat) -{ - spin_lock_init(&pgdat->node_size_lock); -} /* * Zone resizing functions * @@ -246,13 +228,6 @@ extern void clear_zone_contiguous(struct zone *zone); ___page; \ }) -/* - * Stub functions for when hotplug is off - */ -static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} -static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} -static inline void pgdat_resize_init(struct pglist_data *pgdat) {} - static inline unsigned zone_span_seqbegin(struct zone *zone) { return 0; @@ -293,6 +268,34 @@ static inline bool movable_node_is_enabled(void) } #endif /* ! CONFIG_MEMORY_HOTPLUG */ +#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) +/* + * pgdat resizing functions + */ +static inline +void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) +{ + spin_lock_irqsave(&pgdat->node_size_lock, *flags); +} +static inline +void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) +{ + spin_unlock_irqrestore(&pgdat->node_size_lock, *flags); +} +static inline +void pgdat_resize_init(struct pglist_data *pgdat) +{ + spin_lock_init(&pgdat->node_size_lock); +} +#else /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */ +/* + * Stub functions for when hotplug is off + */ +static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} +static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} +static inline void pgdat_resize_init(struct pglist_data *pgdat) {} +#endif /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */ + #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a2db4576e499..5d935411d3c4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -633,14 +633,15 @@ typedef struct pglist_data { #ifndef CONFIG_NO_BOOTMEM struct bootmem_data *bdata; #endif -#ifdef CONFIG_MEMORY_HOTPLUG +#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * Must be held any time you expect node_start_pfn, node_present_pages * or node_spanned_pages stay constant. Holding this will also * guarantee that any pfn_valid() stays that way. * * pgdat_resize_lock() and pgdat_resize_unlock() are provided to - * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG. + * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG + * or CONFIG_DEFERRED_STRUCT_PAGE_INIT. * * Nests above zone->lock and zone->span_seqlock */ -- cgit v1.2.3 From c9e97a1997fbf3a1d18d4065c2ca381f0704d7e5 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 5 Apr 2018 16:22:31 -0700 Subject: mm: initialize pages on demand during boot Deferred page initialization allows the boot cpu to initialize a small subset of the system's pages early in boot, with other cpus doing the rest later on. It is, however, problematic to know how many pages the kernel needs during boot. Different modules and kernel parameters may change the requirement, so the boot cpu either initializes too many pages or runs out of memory. To fix that, initialize early pages on demand. This ensures the kernel does the minimum amount of work to initialize pages during boot and leaves the rest to be divided in the multithreaded initialization path (deferred_init_memmap). The on-demand code is permanently disabled using static branching once deferred pages are initialized. After the static branch is changed to false, the overhead is up-to two branch-always instructions if the zone watermark check fails or if rmqueue fails. Sergey Senozhatsky noticed that while deferred pages currently make sense only on NUMA machines (we start one thread per latency node), CONFIG_NUMA is not a requirement for CONFIG_DEFERRED_STRUCT_PAGE_INIT, so that is also must be addressed in the patch. [akpm@linux-foundation.org: fix typo in comment, make deferred_pages static] [pasha.tatashin@oracle.com: fix min() type mismatch warning] Link: http://lkml.kernel.org/r/20180212164543.26592-1-pasha.tatashin@oracle.com [pasha.tatashin@oracle.com: use zone_to_nid() in deferred_grow_zone()] Link: http://lkml.kernel.org/r/20180214163343.21234-2-pasha.tatashin@oracle.com [pasha.tatashin@oracle.com: might_sleep warning] Link: http://lkml.kernel.org/r/20180306192022.28289-1-pasha.tatashin@oracle.com [akpm@linux-foundation.org: s/spin_lock/spin_lock_irq/ in page_alloc_init_late()] [pasha.tatashin@oracle.com: v5] Link: http://lkml.kernel.org/r/20180309220807.24961-3-pasha.tatashin@oracle.com [akpm@linux-foundation.org: tweak comments] [pasha.tatashin@oracle.com: v6] Link: http://lkml.kernel.org/r/20180313182355.17669-3-pasha.tatashin@oracle.com [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20180209192216.20509-2-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Daniel Jordan Reviewed-by: Steven Sistare Reviewed-by: Andrew Morton Tested-by: Masayoshi Mizuma Acked-by: Mel Gorman Cc: Michal Hocko Cc: Catalin Marinas Cc: AKASHI Takahiro Cc: Gioh Kim Cc: Heiko Carstens Cc: Yaowei Bai Cc: Wei Yang Cc: Paul Burton Cc: Miles Chen Cc: Vlastimil Babka Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f92ea7783652..0257aee7ab4b 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -416,21 +416,11 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) { } #endif - -extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, - phys_addr_t end_addr); #else static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) { return 0; } - -static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, - phys_addr_t end_addr) -{ - return 0; -} - #endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From f165b378bbdf6c8afd950060fc3cbc935bb890c6 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 5 Apr 2018 16:22:47 -0700 Subject: mm: uninitialized struct page poisoning sanity checking During boot we poison struct page memory in order to ensure that no one is accessing this memory until the struct pages are initialized in __init_single_page(). This patch adds more scrutiny to this checking by making sure that flags do not equal the poison pattern when they are accessed. The pattern is all ones. Since node id is also stored in struct page, and may be accessed quite early, we add this enforcement into page_to_nid() function as well. Note, this is applicable only when NODE_NOT_IN_PAGE_FLAGS=n [pasha.tatashin@oracle.com: v4] Link: http://lkml.kernel.org/r/20180215165920.8570-4-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180213193159.14606-4-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Ingo Molnar Acked-by: Michal Hocko Cc: Baoquan He Cc: Bharata B Rao Cc: Daniel Jordan Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Steven Sistare Cc: Thomas Gleixner Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 +++- include/linux/page-flags.h | 22 +++++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f945dff34925..2e40a44a1fae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -903,7 +903,9 @@ extern int page_to_nid(const struct page *page); #else static inline int page_to_nid(const struct page *page) { - return (page->flags >> NODES_PGSHIFT) & NODES_MASK; + struct page *p = (struct page *)page; + + return (PF_POISONED_CHECK(p)->flags >> NODES_PGSHIFT) & NODES_MASK; } #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 50c2b8786831..e34a27727b9a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -156,9 +156,18 @@ static __always_inline int PageCompound(struct page *page) return test_bit(PG_head, &page->flags) || PageTail(page); } +#define PAGE_POISON_PATTERN -1l +static inline int PagePoisoned(const struct page *page) +{ + return page->flags == PAGE_POISON_PATTERN; +} + /* * Page flags policies wrt compound pages * + * PF_POISONED_CHECK + * check if this struct page poisoned/uninitialized + * * PF_ANY: * the page flag is relevant for small, head and tail pages. * @@ -176,17 +185,20 @@ static __always_inline int PageCompound(struct page *page) * PF_NO_COMPOUND: * the page flag is not relevant for compound pages. */ -#define PF_ANY(page, enforce) page -#define PF_HEAD(page, enforce) compound_head(page) +#define PF_POISONED_CHECK(page) ({ \ + VM_BUG_ON_PGFLAGS(PagePoisoned(page), page); \ + page; }) +#define PF_ANY(page, enforce) PF_POISONED_CHECK(page) +#define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page)) #define PF_ONLY_HEAD(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(PageTail(page), page); \ - page;}) + PF_POISONED_CHECK(page); }) #define PF_NO_TAIL(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ - compound_head(page);}) + PF_POISONED_CHECK(compound_head(page)); }) #define PF_NO_COMPOUND(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ - page;}) + PF_POISONED_CHECK(page); }) /* * Macros to create function definitions for page flags -- cgit v1.2.3 From fc44f7f9231a73821fc858f5bc48883a9e78f6de Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 5 Apr 2018 16:22:56 -0700 Subject: mm/memory_hotplug: don't read nid from struct page during hotplug During memory hotplugging the probe routine will leave struct pages uninitialized, the same as it is currently done during boot. Therefore, we do not want to access the inside of struct pages before __init_single_page() is called during onlining. Because during hotplug we know that pages in one memory block belong to the same numa node, we can skip the checking. We should keep checking for the boot case. [pasha.tatashin@oracle.com: s/register_new_memory()/hotplug_memory_register()] Link: http://lkml.kernel.org/r/20180228030308.1116-6-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180215165920.8570-6-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Acked-by: Michal Hocko Reviewed-by: Ingo Molnar Cc: Baoquan He Cc: Bharata B Rao Cc: Daniel Jordan Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Steven Sistare Cc: Thomas Gleixner Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 2 +- include/linux/node.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index f71e732c77b2..9f8cd856ca1e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -109,7 +109,7 @@ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); -extern int register_new_memory(int, struct mem_section *); +int hotplug_memory_register(int nid, struct mem_section *section); #ifdef CONFIG_MEMORY_HOTREMOVE extern int unregister_memory_section(struct mem_section *); #endif diff --git a/include/linux/node.h b/include/linux/node.h index 4ece0fee0ffc..41f171861dcc 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -67,7 +67,7 @@ extern void unregister_one_node(int nid); extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); extern int register_mem_sect_under_node(struct memory_block *mem_blk, - int nid); + int nid, bool check_nid); extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, unsigned long phys_index); @@ -97,7 +97,7 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) return 0; } static inline int register_mem_sect_under_node(struct memory_block *mem_blk, - int nid) + int nid, bool check_nid) { return 0; } -- cgit v1.2.3 From d0dc12e86b3197a14a908d4fe7cb35b73dda82b5 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 5 Apr 2018 16:23:00 -0700 Subject: mm/memory_hotplug: optimize memory hotplug During memory hotplugging we traverse struct pages three times: 1. memset(0) in sparse_add_one_section() 2. loop in __add_section() to set do: set_page_node(page, nid); and SetPageReserved(page); 3. loop in memmap_init_zone() to call __init_single_pfn() This patch removes the first two loops, and leaves only loop 3. All struct pages are initialized in one place, the same as it is done during boot. The benefits: - We improve memory hotplug performance because we are not evicting the cache several times and also reduce loop branching overhead. - Remove condition from hotpath in __init_single_pfn(), that was added in order to fix the problem that was reported by Bharata in the above email thread, thus also improve performance during normal boot. - Make memory hotplug more similar to the boot memory initialization path because we zero and initialize struct pages only in one function. - Simplifies memory hotplug struct page initialization code, and thus enables future improvements, such as multi-threading the initialization of struct pages in order to improve hotplug performance even further on larger machines. [pasha.tatashin@oracle.com: v5] Link: http://lkml.kernel.org/r/20180228030308.1116-7-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180215165920.8570-7-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Ingo Molnar Cc: Michal Hocko Cc: Baoquan He Cc: Bharata B Rao Cc: Daniel Jordan Cc: Dan Williams Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Steven Sistare Cc: Thomas Gleixner Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 9f8cd856ca1e..31ca3e28b0eb 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -33,6 +33,7 @@ struct memory_block { void *hw; /* optional pointer to fw/hw data */ int (*phys_callback)(struct memory_block *); struct device dev; + int nid; /* NID for this memory block */ }; int arch_get_memory_phys_device(unsigned long start_pfn); -- cgit v1.2.3 From 31286a8484a85e8b4e91ddb0f5415aee8a416827 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 5 Apr 2018 16:23:05 -0700 Subject: mm: hwpoison: disable memory error handling on 1GB hugepage Recently the following BUG was reported: Injecting memory failure for pfn 0x3c0000 at process virtual address 0x7fe300000000 Memory failure: 0x3c0000: recovery action for huge page: Recovered BUG: unable to handle kernel paging request at ffff8dfcc0003000 IP: gup_pgd_range+0x1f0/0xc20 PGD 17ae72067 P4D 17ae72067 PUD 0 Oops: 0000 [#1] SMP PTI ... CPU: 3 PID: 5467 Comm: hugetlb_1gb Not tainted 4.15.0-rc8-mm1-abc+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014 You can easily reproduce this by calling madvise(MADV_HWPOISON) twice on a 1GB hugepage. This happens because get_user_pages_fast() is not aware of a migration entry on pud that was created in the 1st madvise() event. I think that conversion to pud-aligned migration entry is working, but other MM code walking over page table isn't prepared for it. We need some time and effort to make all this work properly, so this patch avoids the reported bug by just disabling error handling for 1GB hugepage. [n-horiguchi@ah.jp.nec.com: v2] Link: http://lkml.kernel.org/r/1517284444-18149-1-git-send-email-n-horiguchi@ah.jp.nec.com Link: http://lkml.kernel.org/r/1517207283-15769-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Acked-by: Michal Hocko Reviewed-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: Punit Agrawal Tested-by: Michael Ellerman Cc: Anshuman Khandual Cc: "Aneesh Kumar K.V" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e40a44a1fae..2e2be527642a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2613,6 +2613,7 @@ enum mf_action_page_type { MF_MSG_POISONED_HUGE, MF_MSG_HUGE, MF_MSG_FREE_HUGE, + MF_MSG_NON_PMD_HUGE, MF_MSG_UNMAP_FAILED, MF_MSG_DIRTY_SWAPCACHE, MF_MSG_CLEAN_SWAPCACHE, -- cgit v1.2.3 From 03f5d58fa42fb337b921e57f8e2c2d4df7df890d Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 5 Apr 2018 16:23:24 -0700 Subject: mm/page_ref: use atomic_set_release in page_ref_unfreeze page_ref_unfreeze() has exactly that semantic. No functional changes: just minus one barrier and proper handling of PPro errata. Link: http://lkml.kernel.org/r/151844393004.210639.4672319312617954272.stgit@buzz Signed-off-by: Konstantin Khlebnikov Acked-by: Kirill A. Shutemov Cc: Michal Hocko Cc: Nicholas Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ref.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 760d74a0e9a9..14d14beb1f7f 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -175,8 +175,7 @@ static inline void page_ref_unfreeze(struct page *page, int count) VM_BUG_ON_PAGE(page_count(page) != 0, page); VM_BUG_ON(count == 0); - smp_mb(); - atomic_set(&page->_refcount, count); + atomic_set_release(&page->_refcount, count); if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze)) __page_ref_unfreeze(page, count); } -- cgit v1.2.3 From eaf649ebc3acfbb235ce31cebd06e4876d05758e Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 5 Apr 2018 16:23:39 -0700 Subject: mm: swap: clean up swap readahead When I see recent change of swap readahead, I am very unhappy about current code structure which diverges two swap readahead algorithm in do_swap_page. This patch is to clean it up. Main motivation is that fault handler doesn't need to be aware of readahead algorithms but just should call swapin_readahead. As first step, this patch cleans up a little bit but not perfect (I just separate for review easier) so next patch will make the goal complete. [minchan@kernel.org: do not check readahead flag with THP anon] Link: http://lkml.kernel.org/r/874lm83zho.fsf@yhuang-dev.intel.com Link: http://lkml.kernel.org/r/20180227232611.169883-1-minchan@kernel.org Link: http://lkml.kernel.org/r/1509520520-32367-2-git-send-email-minchan@kernel.org Link: http://lkml.kernel.org/r/20180220085249.151400-2-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: Andrew Morton Cc: Hugh Dickins Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index a1a3f4ed94ce..fa92177d863e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -424,12 +424,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, bool *new_page_allocated); extern struct page *swapin_readahead(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr); - -extern struct page *swap_readahead_detect(struct vm_fault *vmf, - struct vma_swap_readahead *swap_ra); extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, - struct vm_fault *vmf, - struct vma_swap_readahead *swap_ra); + struct vm_fault *vmf); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; @@ -548,15 +544,8 @@ static inline bool swap_use_vma_readahead(void) return false; } -static inline struct page *swap_readahead_detect( - struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) -{ - return NULL; -} - -static inline struct page *do_swap_page_readahead( - swp_entry_t fentry, gfp_t gfp_mask, - struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) +static inline struct page *do_swap_page_readahead(swp_entry_t fentry, + gfp_t gfp_mask, struct vm_fault *vmf) { return NULL; } -- cgit v1.2.3 From e9e9b7ecee4a139a6fbe2e15ef224ca6b6c47d57 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 5 Apr 2018 16:23:42 -0700 Subject: mm: swap: unify cluster-based and vma-based swap readahead This patch makes do_swap_page() not need to be aware of two different swap readahead algorithms. Just unify cluster-based and vma-based readahead function call. Link: http://lkml.kernel.org/r/1509520520-32367-3-git-send-email-minchan@kernel.org Link: http://lkml.kernel.org/r/20180220085249.151400-3-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: Andrew Morton Cc: Hugh Dickins Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index fa92177d863e..2417d288e016 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -400,7 +400,6 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *, #define SWAP_ADDRESS_SPACE_SHIFT 14 #define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) extern struct address_space *swapper_spaces[]; -extern bool swap_vma_readahead; #define swap_address_space(entry) \ (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ >> SWAP_ADDRESS_SPACE_SHIFT]) @@ -422,10 +421,10 @@ extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool *new_page_allocated); -extern struct page *swapin_readahead(swp_entry_t, gfp_t, - struct vm_area_struct *vma, unsigned long addr); -extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, - struct vm_fault *vmf); +extern struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, + struct vm_fault *vmf); +extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, + struct vm_fault *vmf); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; @@ -433,11 +432,6 @@ extern long total_swap_pages; extern atomic_t nr_rotate_swap; extern bool has_usable_swap(void); -static inline bool swap_use_vma_readahead(void) -{ - return READ_ONCE(swap_vma_readahead) && !atomic_read(&nr_rotate_swap); -} - /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) { @@ -533,19 +527,14 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp) { } -static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, - struct vm_area_struct *vma, unsigned long addr) +static inline struct page *swap_cluster_readahead(swp_entry_t entry, + gfp_t gfp_mask, struct vm_fault *vmf) { return NULL; } -static inline bool swap_use_vma_readahead(void) -{ - return false; -} - -static inline struct page *do_swap_page_readahead(swp_entry_t fentry, - gfp_t gfp_mask, struct vm_fault *vmf) +static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, + struct vm_fault *vmf) { return NULL; } -- cgit v1.2.3 From 4f6923fbb352d126659cabe34806cff75c7b5ea0 Mon Sep 17 00:00:00 2001 From: Howard McLauchlan Date: Thu, 5 Apr 2018 16:23:57 -0700 Subject: mm: make should_failslab always available for fault injection should_failslab() is a convenient function to hook into for directed error injection into kmalloc(). However, it is only available if a config flag is set. The following BCC script, for example, fails kmalloc() calls after a btrfs umount: from bcc import BPF prog = r""" BPF_HASH(flag); #include int kprobe__btrfs_close_devices(void *ctx) { u64 key = 1; flag.update(&key, &key); return 0; } int kprobe__should_failslab(struct pt_regs *ctx) { u64 key = 1; u64 *res; res = flag.lookup(&key); if (res != 0) { bpf_override_return(ctx, -ENOMEM); } return 0; } """ b = BPF(text=prog) while 1: b.kprobe_poll() This patch refactors the should_failslab implementation so that the function is always available for error injection, independent of flags. This change would be similar in nature to commit f5490d3ec921 ("block: Add should_fail_bio() for bpf error injection"). Link: http://lkml.kernel.org/r/20180222020320.6944-1-hmclauchlan@fb.com Signed-off-by: Howard McLauchlan Reviewed-by: Andrew Morton Cc: Akinobu Mita Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Josef Bacik Cc: Johannes Weiner Cc: Alexei Starovoitov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fault-inject.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index c3c95d18bf43..7e6c77740413 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -64,10 +64,11 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, struct kmem_cache; +int should_failslab(struct kmem_cache *s, gfp_t gfpflags); #ifdef CONFIG_FAILSLAB -extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags); +extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); #else -static inline bool should_failslab(struct kmem_cache *s, gfp_t gfpflags) +static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { return false; } -- cgit v1.2.3 From 05ea88608d4e135695571727f5d7f22967d2a3bf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 5 Apr 2018 16:24:25 -0700 Subject: mm, hugetlbfs: introduce ->pagesize() to vm_operations_struct When device-dax is operating in huge-page mode we want it to behave like hugetlbfs and report the MMU page mapping size that is being enforced by the vma. Similar to commit 31383c6865a5 "mm, hugetlbfs: introduce ->split() to vm_operations_struct" it would be messy to teach vma_mmu_pagesize() about device-dax page mapping sizes in the same (hstate) way that hugetlbfs communicates this attribute. Instead, these patches introduce a new ->pagesize() vm operation. Link: http://lkml.kernel.org/r/151996254734.27922.15813097401404359642.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reported-by: Jane Chu Reviewed-by: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e2be527642a..7c06581edaa2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -390,6 +390,7 @@ struct vm_operations_struct { int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); void (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); + unsigned long (*pagesize)(struct vm_area_struct * area); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ -- cgit v1.2.3 From 5844a486daf2705dcdbfabe869a698bdfe629f54 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 5 Apr 2018 16:24:32 -0700 Subject: include/linux/mm.h: provide consistent declaration for num_poisoned_pages clang reports the following compile warning. In file included from mm/vmscan.c:56: ./include/linux/swapops.h:327:22: warning: section attribute is specified on redeclared variable [-Wsection] extern atomic_long_t num_poisoned_pages __read_mostly; ^ ./include/linux/mm.h:2585:22: note: previous declaration is here extern atomic_long_t num_poisoned_pages; ^ Let's use __read_mostly everywhere. Link: http://lkml.kernel.org/r/1519686565-8224-1-git-send-email-linux@roeck-us.net Signed-off-by: Guenter Roeck Reviewed-by: Andrew Morton Cc: Matthias Kaehlcke Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7c06581edaa2..40fca1b2b6a1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2592,7 +2592,7 @@ extern int get_hwpoison_page(struct page *page); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); -extern atomic_long_t num_poisoned_pages; +extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(struct page *page, int flags); -- cgit v1.2.3 From cb9f753a3731f7fe16447bea45cb6f8e8bb432fb Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 5 Apr 2018 16:24:39 -0700 Subject: mm: fix races between swapoff and flush dcache Thanks to commit 4b3ef9daa4fc ("mm/swap: split swap cache into 64MB trunks"), after swapoff the address_space associated with the swap device will be freed. So page_mapping() users which may touch the address_space need some kind of mechanism to prevent the address_space from being freed during accessing. The dcache flushing functions (flush_dcache_page(), etc) in architecture specific code may access the address_space of swap device for anonymous pages in swap cache via page_mapping() function. But in some cases there are no mechanisms to prevent the swap device from being swapoff, for example, CPU1 CPU2 __get_user_pages() swapoff() flush_dcache_page() mapping = page_mapping() ... exit_swap_address_space() ... kvfree(spaces) mapping_mapped(mapping) The address space may be accessed after being freed. But from cachetlb.txt and Russell King, flush_dcache_page() only care about file cache pages, for anonymous pages, flush_anon_page() should be used. The implementation of flush_dcache_page() in all architectures follows this too. They will check whether page_mapping() is NULL and whether mapping_mapped() is true to determine whether to flush the dcache immediately. And they will use interval tree (mapping->i_mmap) to find all user space mappings. While mapping_mapped() and mapping->i_mmap isn't used by anonymous pages in swap cache at all. So, to fix the race between swapoff and flush dcache, __page_mapping() is add to return the address_space for file cache pages and NULL otherwise. All page_mapping() invoking in flush dcache functions are replaced with page_mapping_file(). [akpm@linux-foundation.org: simplify page_mapping_file(), per Mike] Link: http://lkml.kernel.org/r/20180305083634.15174-1-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Andrew Morton Cc: Minchan Kim Cc: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Cc: Dave Hansen Cc: Chen Liqin Cc: Russell King Cc: Yoshinori Sato Cc: "James E.J. Bottomley" Cc: Guan Xuetao Cc: "David S. Miller" Cc: Chris Zankel Cc: Vineet Gupta Cc: Ley Foon Tan Cc: Ralf Baechle Cc: Andi Kleen Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 40fca1b2b6a1..88d82ba29d72 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1155,6 +1155,7 @@ static inline pgoff_t page_index(struct page *page) bool page_mapped(struct page *page); struct address_space *page_mapping(struct page *page); +struct address_space *page_mapping_file(struct page *page); /* * Return true only if the page has been allocated with -- cgit v1.2.3 From 010b495e2fa32353d0ef6aa70a8169e5ef617a15 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 5 Apr 2018 16:24:43 -0700 Subject: zsmalloc: introduce zs_huge_class_size() Patch series "zsmalloc/zram: drop zram's max_zpage_size", v3. ZRAM's max_zpage_size is a bad thing. It forces zsmalloc to store normal objects as huge ones, which results in bigger zsmalloc memory usage. Drop it and use actual zsmalloc huge-class value when decide if the object is huge or not. This patch (of 2): Not every object can be share its zspage with other objects, e.g. when the object is as big as zspage or nearly as big a zspage. For such objects zsmalloc has a so called huge class - every object which belongs to huge class consumes the entire zspage (which consists of a physical page). On x86_64, PAGE_SHIFT 12 box, the first non-huge class size is 3264, so starting down from size 3264, objects can share page(-s) and thus minimize memory wastage. ZRAM, however, has its own statically defined watermark for huge objects, namely "3 * PAGE_SIZE / 4 = 3072", and forcibly stores every object larger than this watermark (3072) as a PAGE_SIZE object, in other words, to a huge class, while zsmalloc can keep some of those objects in non-huge classes. This results in increased memory consumption. zsmalloc knows better if the object is huge or not. Introduce zs_huge_class_size() function which tells if the given object can be stored in one of non-huge classes or not. This will let us to drop ZRAM's huge object watermark and fully rely on zsmalloc when we decide if the object is huge. [sergey.senozhatsky.work@gmail.com: add pool param to zs_huge_class_size()] Link: http://lkml.kernel.org/r/20180314081833.1096-2-sergey.senozhatsky@gmail.com Link: http://lkml.kernel.org/r/20180306070639.7389-2-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zsmalloc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 57a8e98f2708..2219cce81ca4 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -47,6 +47,8 @@ void zs_destroy_pool(struct zs_pool *pool); unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags); void zs_free(struct zs_pool *pool, unsigned long obj); +size_t zs_huge_class_size(struct zs_pool *pool); + void *zs_map_object(struct zs_pool *pool, unsigned long handle, enum zs_mapmode mm); void zs_unmap_object(struct zs_pool *pool, unsigned long handle); -- cgit v1.2.3 From 0c7c1bed7e13dbb545375c231e6ba1dca5e8d725 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 5 Apr 2018 16:25:08 -0700 Subject: mm: make counting of list_lru_one::nr_items lockless During the reclaiming slab of a memcg, shrink_slab iterates over all registered shrinkers in the system, and tries to count and consume objects related to the cgroup. In case of memory pressure, this behaves bad: I observe high system time and time spent in list_lru_count_one() for many processes on RHEL7 kernel. This patch makes list_lru_node::memcg_lrus rcu protected, that allows to skip taking spinlock in list_lru_count_one(). Shakeel Butt with the patch observes significant perf graph change. He says: ======================================================================== Setup: running a fork-bomb in a memcg of 200MiB on a 8GiB and 4 vcpu VM and recording the trace with 'perf record -g -a'. The trace without the patch: + 34.19% fb.sh [kernel.kallsyms] [k] queued_spin_lock_slowpath + 30.77% fb.sh [kernel.kallsyms] [k] _raw_spin_lock + 3.53% fb.sh [kernel.kallsyms] [k] list_lru_count_one + 2.26% fb.sh [kernel.kallsyms] [k] super_cache_count + 1.68% fb.sh [kernel.kallsyms] [k] shrink_slab + 0.59% fb.sh [kernel.kallsyms] [k] down_read_trylock + 0.48% fb.sh [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore + 0.38% fb.sh [kernel.kallsyms] [k] shrink_node_memcg + 0.32% fb.sh [kernel.kallsyms] [k] queue_work_on + 0.26% fb.sh [kernel.kallsyms] [k] count_shadow_nodes With the patch: + 0.16% swapper [kernel.kallsyms] [k] default_idle + 0.13% oom_reaper [kernel.kallsyms] [k] mutex_spin_on_owner + 0.05% perf [kernel.kallsyms] [k] copy_user_generic_string + 0.05% init.real [kernel.kallsyms] [k] wait_consider_task + 0.05% kworker/0:0 [kernel.kallsyms] [k] finish_task_switch + 0.04% kworker/2:1 [kernel.kallsyms] [k] finish_task_switch + 0.04% kworker/3:1 [kernel.kallsyms] [k] finish_task_switch + 0.04% kworker/1:0 [kernel.kallsyms] [k] finish_task_switch + 0.03% binary [kernel.kallsyms] [k] copy_page ======================================================================== Thanks Shakeel for the testing. [ktkhai@virtuozzo.com: v2] Link: http://lkml.kernel.org/r/151203869520.3915.2587549826865799173.stgit@localhost.localdomain Link: http://lkml.kernel.org/r/150583358557.26700.8490036563698102569.stgit@localhost.localdomain Signed-off-by: Kirill Tkhai Tested-by: Shakeel Butt Acked-by: Vladimir Davydov Cc: Andrey Ryabinin Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index bb8129a3474d..96def9d15b1b 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -32,6 +32,7 @@ struct list_lru_one { }; struct list_lru_memcg { + struct rcu_head rcu; /* array of per cgroup lists, indexed by memcg_cache_id */ struct list_lru_one *lru[0]; }; @@ -43,7 +44,7 @@ struct list_lru_node { struct list_lru_one lru; #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ - struct list_lru_memcg *memcg_lrus; + struct list_lru_memcg __rcu *memcg_lrus; #endif long nr_items; } ____cacheline_aligned_in_smp; -- cgit v1.2.3 From 5ecd9d403ad081ed2de7b118c1e96124d4e0ba6c Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Apr 2018 16:25:16 -0700 Subject: mm, page_alloc: wakeup kcompactd even if kswapd cannot free more memory Kswapd will not wakeup if per-zone watermarks are not failing or if too many previous attempts at background reclaim have failed. This can be true if there is a lot of free memory available. For high- order allocations, kswapd is responsible for waking up kcompactd for background compaction. If the zone is not below its watermarks or reclaim has recently failed (lots of free memory, nothing left to reclaim), kcompactd does not get woken up. When __GFP_DIRECT_RECLAIM is not allowed, allow kcompactd to still be woken up even if kswapd will not reclaim. This allows high-order allocations, such as thp, to still trigger background compaction even when the zone has an abundance of free memory. Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1803111659420.209721@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Vlastimil Babka Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5d935411d3c4..f11ae29005f1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -776,7 +776,8 @@ static inline bool is_dev_zone(const struct zone *zone) #include void build_all_zonelists(pg_data_t *pgdat); -void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); +void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order, + enum zone_type classzone_idx); bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, unsigned int alloc_flags, long free_pages); -- cgit v1.2.3 From 1c8f422059ae5da07db7406ab916203f9417e396 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Thu, 5 Apr 2018 16:25:23 -0700 Subject: mm: change return type to vm_fault_t The plan for these patches is to introduce the typedef, initially just as documentation ("These functions should return a VM_FAULT_ status"). We'll trickle the patches to individual drivers/filesystems in through the maintainers, as far as possible. Then we'll change the typedef to an unsigned int and break the compilation of any unconverted drivers/filesystems. vmf_insert_page(), vmf_insert_mixed() and vmf_insert_pfn() are three newly added functions. The various drivers/filesystems where return value of fault(), huge_fault(), page_mkwrite() and pfn_mkwrite() get converted, will need them. These functions will return correct VM_FAULT_ code based on err value. We've had bugs before where drivers returned -EFOO. And we have this silly inefficiency where vm_insert_xxx() return an errno which (afaict) every driver then converts into a VM_FAULT code. In many cases drivers failed to return correct VM_FAULT code value despite of vm_insert_xxx() fails. We have indentified and clean up all those existing bugs and silly inefficiencies in driver/filesystems by adding these three new inline wrappers. As mentioned above, we will trickle those patches to individual drivers/filesystems in through maintainers after these three wrapper functions are merged. Eventually we can convert vm_insert_xxx() into vmf_insert_xxx() and remove these inline wrappers, but these are a good intermediate step. Link: http://lkml.kernel.org/r/20180310162351.GA7422@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Acked-by: Michal Hocko Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 47 +++++++++++++++++++++++++++++++++++++++++++---- include/linux/mm_types.h | 2 ++ 2 files changed, 45 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 88d82ba29d72..3ad632366973 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -386,18 +386,19 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); - int (*fault)(struct vm_fault *vmf); - int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); + vm_fault_t (*fault)(struct vm_fault *vmf); + vm_fault_t (*huge_fault)(struct vm_fault *vmf, + enum page_entry_size pe_size); void (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); unsigned long (*pagesize)(struct vm_area_struct * area); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ - int (*page_mkwrite)(struct vm_fault *vmf); + vm_fault_t (*page_mkwrite)(struct vm_fault *vmf); /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ - int (*pfn_mkwrite)(struct vm_fault *vmf); + vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf); /* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware @@ -2424,6 +2425,44 @@ int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); +static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, + unsigned long addr, struct page *page) +{ + int err = vm_insert_page(vma, addr, page); + + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err < 0 && err != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static inline vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, + unsigned long addr, pfn_t pfn) +{ + int err = vm_insert_mixed(vma, addr, pfn); + + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err < 0 && err != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn) +{ + int err = vm_insert_pfn(vma, addr, pfn); + + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err < 0 && err != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fd1af6b9591d..21612347d311 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -22,6 +22,8 @@ #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) +typedef int vm_fault_t; + struct address_space; struct mem_cgroup; struct hmm; -- cgit v1.2.3 From 91241681c62a5a690c88eb2aca027f094125eaac Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 5 Apr 2018 16:25:30 -0700 Subject: include/linux/mmdebug.h: make VM_WARN* non-rvals At present the construct if (VM_WARN(...)) will compile OK with CONFIG_DEBUG_VM=y and will fail with CONFIG_DEBUG_VM=n. The reason is that VM_{WARN,BUG}* have always been special wrt. {WARN/BUG}* and never generate any code when DEBUG_VM is disabled. So we cannot really use it in conditionals. We considered changing things so that this construct works in both cases but that might cause unwanted code generation with CONFIG_DEBUG_VM=n. It is safer and simpler to make the build fail in both cases. [akpm@linux-foundation.org: changelog] Signed-off-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmdebug.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 57b0030d3800..2ad72d2c8cc5 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -37,10 +37,10 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) -#define VM_WARN_ON(cond) WARN_ON(cond) -#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond) -#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) -#define VM_WARN(cond, format...) WARN(cond, format) +#define VM_WARN_ON(cond) (void)WARN_ON(cond) +#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) +#define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) +#define VM_WARN(cond, format...) (void)WARN(cond, format) #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) -- cgit v1.2.3 From 514c60324960137e74457fdc233a339b985fa8a8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 5 Apr 2018 16:25:34 -0700 Subject: headers: untangle kmemleak.h from mm.h Currently #includes for no obvious reason. It looks like it's only a convenience, so remove kmemleak.h from slab.h and add to any users of kmemleak_* that don't already #include it. Also remove from source files that do not use it. This is tested on i386 allmodconfig and x86_64 allmodconfig. It would be good to run it through the 0day bot for other $ARCHes. I have neither the horsepower nor the storage space for the other $ARCHes. Update: This patch has been extensively build-tested by both the 0day bot & kisskb/ozlabs build farms. Both of them reported 2 build failures for which patches are included here (in v2). [ slab.h is the second most used header file after module.h; kernel.h is right there with slab.h. There could be some minor error in the counting due to some #includes having comments after them and I didn't combine all of those. ] [akpm@linux-foundation.org: security/keys/big_key.c needs vmalloc.h, per sfr] Link: http://lkml.kernel.org/r/e4309f98-3749-93e1-4bb7-d9501a39d015@infradead.org Link: http://kisskb.ellerman.id.au/kisskb/head/13396/ Signed-off-by: Randy Dunlap Reviewed-by: Ingo Molnar Reported-by: Michael Ellerman [2 build failures] Reported-by: Fengguang Wu [2 build failures] Reviewed-by: Andrew Morton Cc: Wei Yongjun Cc: Luis R. Rodriguez Cc: Greg Kroah-Hartman Cc: Mimi Zohar Cc: John Johansen Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 04402c637171..81ebd71f8c03 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -125,7 +125,6 @@ #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ (unsigned long)ZERO_SIZE_PTR) -#include #include struct mem_cgroup; -- cgit v1.2.3 From efefc97736e6f3261879bc9dddcb161224a455f5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 20 Mar 2018 10:11:28 +0100 Subject: jiffies: Introduce USER_TICK_USEC and redefine TICK_USEC Since the subsequent changes will need a TICK_USEC definition analogous to TICK_NSEC, rename the existing TICK_USEC as USER_TICK_USEC, update its users and redefine TICK_USEC accordingly. Suggested-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/jiffies.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 9385aa57497b..a27cf6652327 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -62,8 +62,11 @@ extern int register_refined_jiffies(long clock_tick_rate); /* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) -/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ -#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) +/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */ +#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ) + +/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ +#define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) #ifndef __jiffy_arch_data #define __jiffy_arch_data -- cgit v1.2.3 From 45f1ff59e27ca59d33cc1a317e669d90022ccf7d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Mar 2018 17:50:49 +0100 Subject: cpuidle: Return nohz hint from cpuidle_select() Add a new pointer argument to cpuidle_select() and to the ->select cpuidle governor callback to allow a boolean value indicating whether or not the tick should be stopped before entering the selected state to be returned from there. Make the ladder governor ignore that pointer (to preserve its current behavior) and make the menu governor return 'false" through it if: (1) the idle exit latency is constrained at 0, or (2) the selected state is a polling one, or (3) the expected idle period duration is within the tick period range. In addition to that, the correction factor computations in the menu governor need to take the possibility that the tick may not be stopped into account to avoid artificially small correction factor values. To that end, add a mechanism to record tick wakeups, as suggested by Peter Zijlstra, and use it to modify the menu_update() behavior when tick wakeup occurs. Namely, if the CPU is woken up by the tick and the return value of tick_nohz_get_sleep_length() is not within the tick boundary, the predicted idle duration is likely too short, so make menu_update() try to compensate for that by updating the governor statistics as though the CPU was idle for a long time. Since the value returned through the new argument pointer of cpuidle_select() is not used by its caller yet, this change by itself is not expected to alter the functionality of the code. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- include/linux/cpuidle.h | 8 +++++--- include/linux/tick.h | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index a806e94c482f..1eefabf1621f 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -135,7 +135,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv, struct cpuidle_device *dev); extern int cpuidle_select(struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + bool *stop_tick); extern int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index); @@ -167,7 +168,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return true; } static inline int cpuidle_select(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, bool *stop_tick) {return -ENODEV; } static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) @@ -250,7 +251,8 @@ struct cpuidle_governor { struct cpuidle_device *dev); int (*select) (struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + bool *stop_tick); void (*reflect) (struct cpuidle_device *dev, int index); }; diff --git a/include/linux/tick.h b/include/linux/tick.h index fccebfba167e..ef0717e5e526 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -120,6 +120,7 @@ extern void tick_nohz_idle_restart_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); +extern bool tick_nohz_idle_got_tick(void); extern ktime_t tick_nohz_get_sleep_length(void); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); @@ -141,6 +142,7 @@ static inline void tick_nohz_idle_stop_tick(void) { } static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } +static inline bool tick_nohz_idle_got_tick(void) { return false; } static inline ktime_t tick_nohz_get_sleep_length(void) { -- cgit v1.2.3 From ee1235a9a06813429c201bf186397a6feeea07bf Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Apr 2018 13:41:28 +0100 Subject: fscache: Pass object size in rather than calling back for it Pass the object size in to fscache_acquire_cookie() and fscache_write_page() rather than the netfs providing a callback by which it can be received. This makes it easier to update the size of the object when a new page is written that extends the object. The current object size is also passed by fscache to the check_aux function, obviating the need to store it in the aux data. Signed-off-by: David Howells Acked-by: Anna Schumaker Tested-by: Steve Dickson --- include/linux/fscache-cache.h | 3 ++- include/linux/fscache.h | 32 ++++++++++++++++---------------- 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 3e764fd38d9f..34cf0fdd7dc7 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -553,7 +553,8 @@ extern bool fscache_object_sleep_till_congested(signed long *timeoutp); extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, const void *data, - uint16_t datalen); + uint16_t datalen, + loff_t object_size); extern void fscache_object_retrying_stale(struct fscache_object *object); diff --git a/include/linux/fscache.h b/include/linux/fscache.h index a2d3a2116248..eb38f39cf832 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -83,22 +83,15 @@ struct fscache_cookie_def { const void *parent_netfs_data, const void *cookie_netfs_data); - /* get certain file attributes from the netfs data - * - this function can be absent for an index - * - not permitted to return an error - * - the netfs data from the cookie being used as the source is - * presented - */ - void (*get_attr)(const void *cookie_netfs_data, uint64_t *size); - /* consult the netfs about the state of an object * - this function can be absent if the index carries no state data * - the netfs data from the cookie being used as the target is - * presented, as is the auxiliary data + * presented, as is the auxiliary data and the object size */ enum fscache_checkaux (*check_aux)(void *cookie_netfs_data, const void *data, - uint16_t datalen); + uint16_t datalen, + loff_t object_size); /* get an extra reference on a read context * - this function can be absent if the completion function doesn't @@ -200,7 +193,7 @@ extern struct fscache_cookie *__fscache_acquire_cookie( const struct fscache_cookie_def *, const void *, size_t, const void *, size_t, - void *, bool); + void *, loff_t, bool); extern void __fscache_relinquish_cookie(struct fscache_cookie *, const void *, bool); extern int __fscache_check_consistency(struct fscache_cookie *, const void *); extern void __fscache_update_cookie(struct fscache_cookie *, const void *); @@ -220,7 +213,7 @@ extern int __fscache_read_or_alloc_pages(struct fscache_cookie *, void *, gfp_t); extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t); -extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t); +extern int __fscache_write_page(struct fscache_cookie *, struct page *, loff_t, gfp_t); extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); @@ -231,7 +224,7 @@ extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *, extern void __fscache_readpages_cancel(struct fscache_cookie *cookie, struct list_head *pages); extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool); -extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, +extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t, bool (*)(void *), void *); /** @@ -315,6 +308,7 @@ void fscache_release_cache_tag(struct fscache_cache_tag *tag) * @aux_data_len: Size of the auxiliary data buffer * @netfs_data: An arbitrary piece of data to be kept in the cookie to * represent the cache object to the netfs + * @object_size: The initial size of object * @enable: Whether or not to enable a data cookie immediately * * This function is used to inform FS-Cache about part of an index hierarchy @@ -333,13 +327,14 @@ struct fscache_cookie *fscache_acquire_cookie( const void *aux_data, size_t aux_data_len, void *netfs_data, + loff_t object_size, bool enable) { if (fscache_cookie_valid(parent) && fscache_cookie_enabled(parent)) return __fscache_acquire_cookie(parent, def, index_key, index_key_len, aux_data, aux_data_len, - netfs_data, enable); + netfs_data, object_size, enable); else return NULL; } @@ -660,6 +655,7 @@ void fscache_readpages_cancel(struct fscache_cookie *cookie, * fscache_write_page - Request storage of a page in the cache * @cookie: The cookie representing the cache object * @page: The netfs page to store + * @object_size: Updated size of object * @gfp: The conditions under which memory allocation should be made * * Request the contents of the netfs page be written into the cache. This @@ -677,10 +673,11 @@ void fscache_readpages_cancel(struct fscache_cookie *cookie, static inline int fscache_write_page(struct fscache_cookie *cookie, struct page *page, + loff_t object_size, gfp_t gfp) { if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_write_page(cookie, page, gfp); + return __fscache_write_page(cookie, page, object_size, gfp); else return -ENOBUFS; } @@ -819,6 +816,7 @@ void fscache_disable_cookie(struct fscache_cookie *cookie, * fscache_enable_cookie - Reenable a cookie * @cookie: The cookie representing the cache object * @aux_data: The updated auxiliary data for the cookie (may be NULL) + * @object_size: Current size of object * @can_enable: A function to permit enablement once lock is held * @data: Data for can_enable() * @@ -833,11 +831,13 @@ void fscache_disable_cookie(struct fscache_cookie *cookie, static inline void fscache_enable_cookie(struct fscache_cookie *cookie, const void *aux_data, + loff_t object_size, bool (*can_enable)(void *data), void *data) { if (fscache_cookie_valid(cookie) && !fscache_cookie_enabled(cookie)) - __fscache_enable_cookie(cookie, aux_data, can_enable, data); + __fscache_enable_cookie(cookie, aux_data, object_size, + can_enable, data); } #endif /* _LINUX_FSCACHE_H */ -- cgit v1.2.3 From ec0328e46d6e5d0f17372eb90ab8e333c2ac7ca9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Apr 2018 13:41:28 +0100 Subject: fscache: Maintain a catalogue of allocated cookies Maintain a catalogue of allocated cookies so that cookie collisions can be handled properly. For the moment, this just involves printing a warning and returning a NULL cookie to the caller of fscache_acquire_cookie(), but in future it might make sense to wait for the old cookie to finish being cleaned up. This requires the cookie key to be stored attached to the cookie so that we still have the key available if the netfs relinquishes the cookie. This is done by an earlier patch. The catalogue also renders redundant fscache_netfs_list (used for checking for duplicates), so that can be removed. Signed-off-by: David Howells Acked-by: Anna Schumaker Tested-by: Steve Dickson --- include/linux/fscache.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index eb38f39cf832..84b90a79d75a 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -22,6 +22,7 @@ #include #include #include +#include #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) #define fscache_available() (1) @@ -124,7 +125,6 @@ struct fscache_netfs { uint32_t version; /* indexing version */ const char *name; /* filesystem name */ struct fscache_cookie *primary_index; - struct list_head link; /* internal link */ }; /* @@ -143,6 +143,7 @@ struct fscache_cookie { struct hlist_head backing_objects; /* object(s) backing this file/index */ const struct fscache_cookie_def *def; /* definition */ struct fscache_cookie *parent; /* parent of this entry */ + struct hlist_bl_node hash_link; /* Link in hash table */ void *netfs_data; /* back pointer to netfs */ struct radix_tree_root stores; /* pages to be stored on this cookie */ #define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ @@ -156,11 +157,14 @@ struct fscache_cookie { #define FSCACHE_COOKIE_RELINQUISHED 4 /* T if cookie has been relinquished */ #define FSCACHE_COOKIE_ENABLED 5 /* T if cookie is enabled */ #define FSCACHE_COOKIE_ENABLEMENT_LOCK 6 /* T if cookie is being en/disabled */ -#define FSCACHE_COOKIE_AUX_UPDATED 7 /* T if the auxiliary data was updated */ +#define FSCACHE_COOKIE_AUX_UPDATED 8 /* T if the auxiliary data was updated */ +#define FSCACHE_COOKIE_ACQUIRED 9 /* T if cookie is in use */ +#define FSCACHE_COOKIE_RELINQUISHING 10 /* T if cookie is being relinquished */ u8 type; /* Type of object */ u8 key_len; /* Length of index key */ u8 aux_len; /* Length of auxiliary data */ + u32 key_hash; /* Hash of parent, type, key, len */ union { void *key; /* Index key */ u8 inline_key[16]; /* - If the key is short enough */ -- cgit v1.2.3 From 1ff19f487a7e55bf3cebc96ea2a9a38d66fb7db7 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 6 Apr 2018 15:21:13 +1000 Subject: libnvdimm: Add of_node to region and bus descriptors We want to be able to cross reference the region and bus devices with the device tree node that they were spawned from. libNVDIMM handles creating the actual devices for these internally, so we need to pass in a pointer to the relevant node in the descriptor. Signed-off-by: Oliver O'Halloran Acked-by: Dan Williams Acked-by: Balbir Singh Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index e0684a678a1b..097072c5a852 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -76,12 +76,14 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc); +struct device_node; struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; unsigned long bus_dsm_mask; unsigned long cmd_mask; struct module *module; char *provider_name; + struct device_node *of_node; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, @@ -123,6 +125,7 @@ struct nd_region_desc { int num_lanes; int numa_node; unsigned long flags; + struct device_node *of_node; }; struct device; -- cgit v1.2.3 From a59855cd8c613ba4bb95147f6176360d95f75e60 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Apr 2018 23:17:00 +0200 Subject: time: hrtimer: Introduce hrtimer_next_event_without() The next set of changes will need to compute the time to the next hrtimer event over all hrtimers except for the scheduler tick one. To that end introduce a new helper function, hrtimer_next_event_without(), for computing the time until the next hrtimer event over all timers except for one and modify the underlying code in __hrtimer_next_event_base() to prepare it for being called by that new function. No intentional changes in functionality. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/hrtimer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c7902ca7c9f4..3892e9c8b2de 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -426,6 +426,7 @@ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } extern u64 hrtimer_get_next_event(void); +extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer); -- cgit v1.2.3 From 554c8aa8ecade210d58a252173bb8f2106552a44 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Apr 2018 23:17:11 +0200 Subject: sched: idle: Select idle state before stopping the tick In order to address the issue with short idle duration predictions by the idle governor after the scheduler tick has been stopped, reorder the code in cpuidle_idle_call() so that the governor idle state selection runs before tick_nohz_idle_go_idle() and use the "nohz" hint returned by cpuidle_select() to decide whether or not to stop the tick. This isn't straightforward, because menu_select() invokes tick_nohz_get_sleep_length() to get the time to the next timer event and the number returned by the latter comes from __tick_nohz_idle_stop_tick(). Fortunately, however, it is possible to compute that number without actually stopping the tick and with the help of the existing code. Namely, tick_nohz_get_sleep_length() can be made call tick_nohz_next_event(), introduced earlier, to get the time to the next non-highres timer event. If that happens, tick_nohz_next_event() need not be called by __tick_nohz_idle_stop_tick() again. If it turns out that the scheduler tick cannot be stopped going forward or the next timer event is too close for the tick to be stopped, tick_nohz_get_sleep_length() can simply return the time to the next event currently programmed into the corresponding clock event device. In addition to knowing the return value of tick_nohz_next_event(), however, tick_nohz_get_sleep_length() needs to know the time to the next highres timer event, but with the scheduler tick timer excluded, which can be computed with the help of hrtimer_get_next_event(). That minimum of that number and the tick_nohz_next_event() return value is the total time to the next timer event with the assumption that the tick will be stopped. It can be returned to the idle governor which can use it for predicting idle duration (under the assumption that the tick will be stopped) and deciding whether or not it makes sense to stop the tick before putting the CPU into the selected idle state. With the above, the sleep_length field in struct tick_sched is not necessary any more, so drop it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227 Reported-by: Doug Smythies Reported-by: Thomas Ilsche Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/tick.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index ef0717e5e526..e8e7ff16b929 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -116,6 +116,7 @@ extern bool tick_nohz_enabled; extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped_cpu(int cpu); extern void tick_nohz_idle_stop_tick(void); +extern void tick_nohz_idle_retain_tick(void); extern void tick_nohz_idle_restart_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); @@ -139,6 +140,7 @@ static inline void tick_nohz_idle_stop_tick_protected(void) static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline void tick_nohz_idle_stop_tick(void) { } +static inline void tick_nohz_idle_retain_tick(void) { } static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } -- cgit v1.2.3 From 296bb1e51a4838a6488ec5ce676607093482ecbc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Apr 2018 19:12:34 +0200 Subject: cpuidle: menu: Refine idle state selection for running tick If the tick isn't stopped, the target residency of the state selected by the menu governor may be greater than the actual time to the next tick and that means lost energy. To avoid that, make tick_nohz_get_sleep_length() return the current time to the next event (before stopping the tick) in addition to the estimated one via an extra pointer argument and make menu_select() use that value to refine the state selection when necessary. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- include/linux/tick.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index e8e7ff16b929..55388ab45fd4 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -122,7 +122,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); -extern ktime_t tick_nohz_get_sleep_length(void); +extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); @@ -146,9 +146,10 @@ static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } static inline bool tick_nohz_idle_got_tick(void) { return false; } -static inline ktime_t tick_nohz_get_sleep_length(void) +static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { - return NSEC_PER_SEC / HZ; + *delta_next = TICK_NSEC; + return *delta_next; } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -- cgit v1.2.3 From e145242ea0df6b7d28fd7186e61d6840fa4bb06e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 9 Apr 2018 12:51:42 +0200 Subject: syscalls/core, syscalls/x86: Clean up syscall stub naming convention Tidy the naming convention for compat syscall subs. Hints which describe the purpose of the stub go in front and receive a double underscore to denote that they are generated on-the-fly by the SYSCALL_DEFINEx() macro. For the generic case, this means (0xffffffff prefix removed): 810f08d0 t kernel_waitid # common C function (see kernel/exit.c) __do_sys_waitid # inlined helper doing the actual work # (takes original parameters as declared) 810f1aa0 T __se_sys_waitid # sign-extending C function calling inlined # helper (takes parameters of type long; # casts them to the declared type) 810f1aa0 T sys_waitid # alias to __se_sys_waitid() (taking # parameters as declared), to be included # in syscall table For x86, the naming is as follows: 810efc70 t kernel_waitid # common C function (see kernel/exit.c) __do_sys_waitid # inlined helper doing the actual work # (takes original parameters as declared) 810efd60 t __se_sys_waitid # sign-extending C function calling inlined # helper (takes parameters of type long; # casts them to the declared type) 810f1140 T __ia32_sys_waitid # IA32_EMULATION 32-bit-ptregs -> C stub, # calls __se_sys_waitid(); to be included # in syscall table 810f1110 T sys_waitid # x86 64-bit-ptregs -> C stub, calls # __se_sys_waitid(); to be included in # syscall table For x86, sys_waitid() will be re-named to __x64_sys_waitid in a follow-up patch. Suggested-by: Ingo Molnar Signed-off-by: Dominik Brodowski Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180409105145.5364-2-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d7168b3a4b4c..70fcda1a9049 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -223,21 +223,26 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) +/* + * The asmlinkage stub is aliased to a function named __se_sys_*() which + * sign-extends 32-bit ints to longs whenever needed. The actual work is + * done within __do_sys_*(). + */ #ifndef __SYSCALL_DEFINEx #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ - __attribute__((alias(__stringify(SyS##name)))); \ + __attribute__((alias(__stringify(__se_sys##name)))); \ ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ - static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ - long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\ __MAP(x,__SC_TEST,__VA_ARGS__); \ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ } \ - static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ /* -- cgit v1.2.3 From 5ac9efa3c50d7caff9f3933bb8a3ad1139d92d92 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 9 Apr 2018 12:51:43 +0200 Subject: syscalls/core, syscalls/x86: Clean up compat syscall stub naming convention Tidy the naming convention for compat syscall subs. Hints which describe the purpose of the stub go in front and receive a double underscore to denote that they are generated on-the-fly by the COMPAT_SYSCALL_DEFINEx() macro. For the generic case, this means: t kernel_waitid # common C function (see kernel/exit.c) __do_compat_sys_waitid # inlined helper doing the actual work # (takes original parameters as declared) T __se_compat_sys_waitid # sign-extending C function calling inlined # helper (takes parameters of type long, # casts them to unsigned long and then to # the declared type) T compat_sys_waitid # alias to __se_compat_sys_waitid() # (taking parameters as declared), to # be included in syscall table For x86, the naming is as follows: t kernel_waitid # common C function (see kernel/exit.c) __do_compat_sys_waitid # inlined helper doing the actual work # (takes original parameters as declared) t __se_compat_sys_waitid # sign-extending C function calling inlined # helper (takes parameters of type long, # casts them to unsigned long and then to # the declared type) T __ia32_compat_sys_waitid # IA32_EMULATION 32-bit-ptregs -> C stub, # calls __se_compat_sys_waitid(); to be # included in syscall table T __x32_compat_sys_waitid # x32 64-bit-ptregs -> C stub, calls # __se_compat_sys_waitid(); to be included # in syscall table If only one of IA32_EMULATION and x32 is enabled, __se_compat_sys_waitid() may be inlined into the stub __{ia32,x32}_compat_sys_waitid(). Suggested-by: Ingo Molnar Signed-off-by: Dominik Brodowski Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180409105145.5364-3-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- include/linux/compat.h | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 2d85ec5cfda2..aca050aac7b6 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -63,19 +63,24 @@ #define COMPAT_SYSCALL_DEFINE6(name, ...) \ COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) +/* + * The asmlinkage stub is aliased to a function named __se_compat_sys_*() which + * sign-extends 32-bit ints to longs whenever needed. The actual work is + * done within __do_compat_sys_*(). + */ #ifndef COMPAT_SYSCALL_DEFINEx -#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\ - __attribute__((alias(__stringify(compat_SyS##name)))); \ - ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ - static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ - asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ - { \ - return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ - } \ - static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ + __attribute__((alias(__stringify(__se_compat_sys##name)))); \ + ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + } \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* COMPAT_SYSCALL_DEFINEx */ #ifndef compat_user_stack_pointer -- cgit v1.2.3 From e9092d0d97961146655ce51f43850907d95f68c3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Apr 2018 10:34:07 -0700 Subject: Fix subtle macro variable shadowing in min_not_zero() Commit 3c8ba0d61d04 ("kernel.h: Retain constant expression output for max()/min()") rewrote our min/max macros to be very clever, but in the meantime resurrected a variable name shadow issue that we had had previously fixed in commit 589a9785ee3a ("min/max: remove sparse warnings when they're nested"). That commit talks about the sparse warnings that this shadowing causes, which we ignored as just a minor annoyance. But it turns out that the sparse warning is the least of our problems. We actually have a real bug due to the shadowing through the interaction with "min_not_zero()", which ends up doing min(__x, __y) internally, and then the new declaration of "__x" and "__y" as new variables in __cmp_once() results in a complete mess of an expression, and "min_not_zero()" doesn't work at all. For some odd reason, this only ever caused (reported) problems on s390, even though it is a generic issue and most of the (obviously successful) testing of the problematic commit had happened on other architectures. Quoting Sebastian Ott: "What happened is that the bio build by the partition detection code was attempted to be split by the block layer because the block queue had a max_sector setting of 0. blk_queue_max_hw_sectors uses min_not_zero." So re-introduce the use of __UNIQUE_ID() to make sure that the min/max macros do not have these kinds of clashes. [ That said, __UNIQUE_ID() itself has several issues that make it less than wonderful. In particular, the "uniqueness" has a fallback on the line number, which means that it's not actually unique in more complex cases if you don't build with gcc or clang (which have working unique counters that aren't tied to line numbers). That historical broken fallback also means that we have that pointless "prefix" argument that doesn't actually make much sense _except_ for the known-broken case. Oh well. ] Fixes: 3c8ba0d61d04 ("kernel.h: Retain constant expression output for max()/min()") Reported-and-tested-by: Sebastian Ott Cc: Kees Cook Cc: Ingo Molnar Cc: Miguel Ojeda Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4ae1dfd9bf05..52b70894eaa5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -822,14 +822,15 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) -#define __cmp_once(x, y, op) ({ \ - typeof(x) __x = (x); \ - typeof(y) __y = (y); \ - __cmp(__x, __y, op); }) - -#define __careful_cmp(x, y, op) \ - __builtin_choose_expr(__safe_cmp(x, y), \ - __cmp(x, y, op), __cmp_once(x, y, op)) +#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + __cmp(unique_x, unique_y, op); }) + +#define __careful_cmp(x, y, op) \ + __builtin_choose_expr(__safe_cmp(x, y), \ + __cmp(x, y, op), \ + __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) /** * min - return minimum of two values of the same or compatible types -- cgit v1.2.3 From a09acf4b43b90581bf53b0c03cc84ed693bf27e2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 9 Apr 2018 21:12:30 +0100 Subject: vfs: Remove the const from dir_context::actor Remove the const marking from the actor function pointer in the dir_context struct. The const prevents the structure from being used as part of a kmalloc'd object as it makes the compiler require that the actor member be set at object initialisation time (or not at all), incuring something like the following error if you try and set it later: fs/afs/dir.c:556:20: error: assignment of read-only member 'actor' Marking the member const like this adds very little in the way of sanity checking as the type checking system is likely to provide sufficient - and if not, the kernel is very likely to oops repeatably in this case. Fixes: ac6614b76478 ("[readdir] constify ->actor") Signed-off-by: David Howells Reviewed-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1ee7f592e239..3a5c19d9f651 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1667,7 +1667,7 @@ typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, unsigned); struct dir_context { - const filldir_t actor; + filldir_t actor; loff_t pos; }; -- cgit v1.2.3 From 96a938aa214e965d5b4a2f10443b29cad14289b9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 20 Mar 2018 15:31:38 -0700 Subject: Input: atmel_mxt_ts - remove platform data support Now that there are no users of custom Atmel platform data, and everyone has switched to the generic device properties, we can remove support for the platform data. Acked-by: Nick Dyer Signed-off-by: Dmitry Torokhov Signed-off-by: Benson Leung --- include/linux/platform_data/atmel_mxt_ts.h | 31 ------------------------------ 1 file changed, 31 deletions(-) delete mode 100644 include/linux/platform_data/atmel_mxt_ts.h (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel_mxt_ts.h b/include/linux/platform_data/atmel_mxt_ts.h deleted file mode 100644 index 695035a8d7fb..000000000000 --- a/include/linux/platform_data/atmel_mxt_ts.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Atmel maXTouch Touchscreen driver - * - * Copyright (C) 2010 Samsung Electronics Co.Ltd - * Author: Joonyoung Shim - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H -#define __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H - -#include - -enum mxt_suspend_mode { - MXT_SUSPEND_DEEP_SLEEP = 0, - MXT_SUSPEND_T9_CTRL = 1, -}; - -/* The platform data for the Atmel maXTouch touchscreen driver */ -struct mxt_platform_data { - unsigned long irqflags; - u8 t19_num_keys; - const unsigned int *t19_keymap; - enum mxt_suspend_mode suspend_mode; -}; - -#endif /* __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H */ -- cgit v1.2.3 From 2dd0df8472ff9bb520673cb5862b08be9290c9fa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 3 Apr 2018 15:37:39 +0530 Subject: cpufreq: Drop cpufreq_table_validate_and_show() This isn't used anymore. Remove the helper and update documentation accordingly. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1fe49724da9e..87f48dd932eb 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -960,8 +960,6 @@ extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; extern struct freq_attr *cpufreq_generic_attr[]; -int cpufreq_table_validate_and_show(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table); int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy); unsigned int cpufreq_generic_get(unsigned int cpu); -- cgit v1.2.3 From 15fe8a90bb45b953ca36f074194fcb519a05fdec Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 8 Apr 2018 17:48:11 +0800 Subject: blk-mq: remove blk_mq_delay_queue() No driver uses this interface any more, so remove it. Cc: Stefan Haberland Tested-by: Christian Borntraeger Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8efcf49796a3..e3986f4b3461 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -183,7 +183,6 @@ enum { BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, - BLK_MQ_S_START_ON_RUN = 3, BLK_MQ_MAX_DEPTH = 10240, @@ -270,7 +269,6 @@ void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); -void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); -- cgit v1.2.3 From a93f00b3762026dd8231f473fae9346bda07db03 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 6 Apr 2018 22:14:51 +0200 Subject: backing: silence compiler warning using __printf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __printf marker was added in commit d2cc4dde9206 ("bdi_register: add __printf verification, fix arg mismatch") for function `bdi_register` since it is useful to verify format and arguments. Apply equivalent gcc attribute to `bdi_register_va`. Remove warning triggered with W=1: mm/backing-dev.c:881:2: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format] Reviewed-by: Jan Kara Signed-off-by: Mathieu Malaterre Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 3e4ce54d84ab..0e9c0f71f726 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -28,6 +28,7 @@ void bdi_put(struct backing_dev_info *bdi); __printf(2, 3) int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...); +__printf(2, 0) int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args); int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner); -- cgit v1.2.3 From fb14ae8853e4f0347950f98e604fa2f4f3b3abe1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 28 Feb 2018 15:30:54 -0500 Subject: xprtrdma: "Support" call-only RPCs RPC-over-RDMA version 1 credit accounting relies on there being a response message for every RPC Call. This means that RPC procedures that have no reply will disrupt credit accounting, just in the same way as a retransmit would (since it is sent because no reply has arrived). Deal with the "no reply" case the same way. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ed761f751ecb..9b11b6a0978c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -217,5 +217,12 @@ void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); void rpc_cleanup_clids(void); + +static inline int rpc_reply_expected(struct rpc_task *task) +{ + return (task->tk_msg.rpc_proc != NULL) && + (task->tk_msg.rpc_proc->p_decode != NULL); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From ecd465ee88bb6648c06c82b1abae6ec28cf5fccb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 5 Mar 2018 15:12:57 -0500 Subject: SUNRPC: Move xprt_update_rtt callsite Since commit 33849792cbcd ("xprtrdma: Detect unreachable NFS/RDMA servers more reliably"), the xprtrdma transport now has a ->timer callout. But xprtrdma does not need to compute RTT data, only UDP needs that. Move the xprt_update_rtt call into the UDP transport implementation. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 7fad83881ce1..ad322cec049a 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -373,6 +373,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action); void xprt_write_space(struct rpc_xprt *xprt); void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); +void xprt_update_rtt(struct rpc_task *task); void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_pin_rqst(struct rpc_rqst *req); void xprt_unpin_rqst(struct rpc_rqst *req); -- cgit v1.2.3 From ff699ea8269a02d977c6ee42d58f76efe83a34f9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 5 Mar 2018 15:13:13 -0500 Subject: SUNRPC: Make num_reqs a non-atomic integer If recording xprt->stat.max_slots is moved into xprt_alloc_slot, then xprt->num_reqs is never manipulated outside xprt->reserve_lock. There's no longer a need for xprt->num_reqs to be atomic. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ad322cec049a..5fea0fb420df 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -197,7 +197,7 @@ struct rpc_xprt { struct list_head free; /* free slots */ unsigned int max_reqs; /* max number of slots */ unsigned int min_reqs; /* min number of slots */ - atomic_t num_reqs; /* total slots */ + unsigned int num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char resvport : 1; /* use a reserved port */ atomic_t swapper; /* we're swapping over this -- cgit v1.2.3 From 912678dbc592db7ad618f383866ad23e43cd51f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2018 16:43:15 -0400 Subject: NFS: Move the delegation return down into nfs4_proc_remove() Move the delegation return out of generic code and down into the NFSv4 specific unlink code. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6959968dc36a..3ebf14b3bf0b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1590,7 +1590,7 @@ struct nfs_rpc_ops { unsigned int); int (*create) (struct inode *, struct dentry *, struct iattr *, int); - int (*remove) (struct inode *, const struct qstr *); + int (*remove) (struct inode *, struct dentry *); void (*unlink_setup) (struct rpc_message *, struct inode *dir); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); int (*unlink_done) (struct rpc_task *, struct inode *); -- cgit v1.2.3 From f2c2c552f119db84d85a53a8bd76479f34df02b1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2018 16:43:16 -0400 Subject: NFS: Move delegation recall into the NFSv4 callback for rename_setup() Move the delegation recall out of the generic code, and into the NFSv4 specific callback. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3ebf14b3bf0b..4b87e2d726b1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1594,7 +1594,9 @@ struct nfs_rpc_ops { void (*unlink_setup) (struct rpc_message *, struct inode *dir); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); int (*unlink_done) (struct rpc_task *, struct inode *); - void (*rename_setup) (struct rpc_message *msg, struct inode *dir); + void (*rename_setup) (struct rpc_message *msg, + struct dentry *old_dentry, + struct dentry *new_dentry); void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, const struct qstr *); -- cgit v1.2.3 From 977fcc2b0b41c1fc82e8349995695e207ccb6684 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2018 16:43:17 -0400 Subject: NFS: Add a delegation return into nfs4_proc_unlink_setup() Ensure that when we do finally delete the file, then we return the delegation. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4b87e2d726b1..c4ba58b3c0f8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1591,7 +1591,7 @@ struct nfs_rpc_ops { int (*create) (struct inode *, struct dentry *, struct iattr *, int); int (*remove) (struct inode *, struct dentry *); - void (*unlink_setup) (struct rpc_message *, struct inode *dir); + void (*unlink_setup) (struct rpc_message *, struct dentry *); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); int (*unlink_done) (struct rpc_task *, struct inode *); void (*rename_setup) (struct rpc_message *msg, -- cgit v1.2.3 From c135cb39a907b85aef5389c191b6f02cffbadb8a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2018 16:43:19 -0400 Subject: NFS: Remove the unused return_delegation() callback Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c4ba58b3c0f8..34d28564ecf3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1635,7 +1635,6 @@ struct nfs_rpc_ops { struct iattr *iattr, int *); int (*have_delegation)(struct inode *, fmode_t); - int (*return_delegation)(struct inode *); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); -- cgit v1.2.3 From 909728821e366599b899251e0a0f023c0ccf4fb0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2018 16:53:27 -0400 Subject: NFS: Convert NFS_INO_INVALID flags to unsigned long The cache validity attribute is unsigned long, so make sure that the flags are too. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 38187c68063d..4afb11be73f4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -198,14 +198,14 @@ struct nfs_inode { /* * Cache validity bit flags */ -#define NFS_INO_INVALID_ATTR 0x0001 /* cached attrs are invalid */ -#define NFS_INO_INVALID_DATA 0x0002 /* cached data is invalid */ -#define NFS_INO_INVALID_ATIME 0x0004 /* cached atime is invalid */ -#define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */ -#define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */ -#define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */ -#define NFS_INO_REVAL_FORCED 0x0040 /* force revalidation ignoring a delegation */ -#define NFS_INO_INVALID_LABEL 0x0080 /* cached label is invalid */ +#define NFS_INO_INVALID_ATTR BIT(0) /* cached attrs are invalid */ +#define NFS_INO_INVALID_DATA BIT(1) /* cached data is invalid */ +#define NFS_INO_INVALID_ATIME BIT(2) /* cached atime is invalid */ +#define NFS_INO_INVALID_ACCESS BIT(3) /* cached access cred invalid */ +#define NFS_INO_INVALID_ACL BIT(4) /* cached acls are invalid */ +#define NFS_INO_REVAL_PAGECACHE BIT(5) /* must revalidate pagecache */ +#define NFS_INO_REVAL_FORCED BIT(6) /* force revalidation ignoring a delegation */ +#define NFS_INO_INVALID_LABEL BIT(7) /* cached label is invalid */ /* * Bit offsets in flags field -- cgit v1.2.3 From 16e143751727471f9a565515344196693bbc8762 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2018 16:53:31 -0400 Subject: NFS: More fine grained attribute tracking Currently, if the NFS_INO_INVALID_ATTR flag is set, for instance by a call to nfs_post_op_update_inode_locked(), then it will not be cleared until all the attributes have been revalidated. This means, for instance, that NFSv4 writes will always force a full attribute revalidation. Track the ctime, mtime, size and change attribute separately from the other attributes so that we can have nfs_post_op_update_inode_locked() set them correctly, and later have the cache consistency bitmask be able to clear them. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4afb11be73f4..2f129bbfaae8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -198,7 +198,6 @@ struct nfs_inode { /* * Cache validity bit flags */ -#define NFS_INO_INVALID_ATTR BIT(0) /* cached attrs are invalid */ #define NFS_INO_INVALID_DATA BIT(1) /* cached data is invalid */ #define NFS_INO_INVALID_ATIME BIT(2) /* cached atime is invalid */ #define NFS_INO_INVALID_ACCESS BIT(3) /* cached access cred invalid */ @@ -206,6 +205,17 @@ struct nfs_inode { #define NFS_INO_REVAL_PAGECACHE BIT(5) /* must revalidate pagecache */ #define NFS_INO_REVAL_FORCED BIT(6) /* force revalidation ignoring a delegation */ #define NFS_INO_INVALID_LABEL BIT(7) /* cached label is invalid */ +#define NFS_INO_INVALID_CHANGE BIT(8) /* cached change is invalid */ +#define NFS_INO_INVALID_CTIME BIT(9) /* cached ctime is invalid */ +#define NFS_INO_INVALID_MTIME BIT(10) /* cached mtime is invalid */ +#define NFS_INO_INVALID_SIZE BIT(11) /* cached size is invalid */ +#define NFS_INO_INVALID_OTHER BIT(12) /* other attrs are invalid */ + +#define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ + | NFS_INO_INVALID_CTIME \ + | NFS_INO_INVALID_MTIME \ + | NFS_INO_INVALID_SIZE \ + | NFS_INO_INVALID_OTHER) /* inode metadata is invalid */ /* * Bit offsets in flags field @@ -292,10 +302,11 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR | - NFS_INO_REVAL_PAGECACHE | - NFS_INO_INVALID_ACCESS | - NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_CHANGE + | NFS_INO_INVALID_CTIME; if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); -- cgit v1.2.3 From 0e779aa70308462e45f7cd1a54de418dfe101694 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2018 17:03:05 -0400 Subject: SUNRPC: Add helpers for decoding opaque and string types Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index d950223c64b1..7e609de34d85 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -253,6 +253,12 @@ xdr_stream_remaining(const struct xdr_stream *xdr) return xdr->nwords << 2; } +ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, + size_t size); +ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, + size_t maxlen, gfp_t gfp_flags); +ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, + size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); /** -- cgit v1.2.3 From 85e3dd44c514a8bed6c713df4af657be83d00f68 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2018 17:03:06 -0400 Subject: SUNRPC: Add a helper for encoding opaque data inline Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 7e609de34d85..a43c3b6455b6 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -318,6 +318,31 @@ xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n) return len; } +/** + * xdr_stream_encode_opaque_inline - Encode opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: pointer to void pointer + * @len: size of object + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len) +{ + size_t count = sizeof(__u32) + xdr_align_size(len); + __be32 *p = xdr_reserve_space(xdr, count); + + if (unlikely(!p)) { + *ptr = NULL; + return -EMSGSIZE; + } + xdr_encode_opaque(p, NULL, len); + *ptr = ++p; + return count; +} + /** * xdr_stream_encode_opaque_fixed - Encode fixed length opaque xdr data * @xdr: pointer to xdr_stream -- cgit v1.2.3 From 37c88763def8474bc0972fbd1adb0d21670104b7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Mar 2018 17:03:08 -0400 Subject: NFSv4; Clean up XDR encoding of type bitmap4 Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index a43c3b6455b6..2bd68177a442 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -386,6 +386,31 @@ xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len) return count; } +/** + * xdr_stream_encode_uint32_array - Encode variable length array of integers + * @xdr: pointer to xdr_stream + * @array: array of integers + * @array_size: number of elements in @array + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_uint32_array(struct xdr_stream *xdr, + const __u32 *array, size_t array_size) +{ + ssize_t ret = (array_size+1) * sizeof(__u32); + __be32 *p = xdr_reserve_space(xdr, ret); + + if (unlikely(!p)) + return -EMSGSIZE; + *p++ = cpu_to_be32(array_size); + for (; array_size > 0; p++, array++, array_size--) + *p = cpu_to_be32p(array); + return ret; +} + /** * xdr_stream_decode_u32 - Decode a 32-bit integer * @xdr: pointer to xdr_stream @@ -463,6 +488,44 @@ xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxle } return len; } + +/** + * xdr_stream_decode_uint32_array - Decode variable length array of integers + * @xdr: pointer to xdr_stream + * @array: location to store the integer array or NULL + * @array_size: number of elements to store + * + * Return values: + * On success, returns number of elements stored in @array + * %-EBADMSG on XDR buffer overflow + * %-EMSGSIZE if the size of the array exceeds @array_size + */ +static inline ssize_t +xdr_stream_decode_uint32_array(struct xdr_stream *xdr, + __u32 *array, size_t array_size) +{ + __be32 *p; + __u32 len; + ssize_t retval; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return -EBADMSG; + p = xdr_inline_decode(xdr, len * sizeof(*p)); + if (unlikely(!p)) + return -EBADMSG; + if (array == NULL) + return len; + if (len <= array_size) { + if (len < array_size) + memset(array+len, 0, (array_size-len)*sizeof(*array)); + array_size = len; + retval = len; + } else + retval = -EMSGSIZE; + for (; array_size > 0; p++, array++, array_size--) + *array = be32_to_cpup(p); + return retval; +} #endif /* __KERNEL__ */ #endif /* _SUNRPC_XDR_H_ */ -- cgit v1.2.3 From b082b2e1454c3e0217d7cf70f2211966c3d54301 Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Fri, 23 Mar 2018 18:42:46 +0100 Subject: platform/chrome: cros_ec_debugfs: Add PD port info to debugfs Add info useful for debugging USB-PD port state. Signed-off-by: Shawn Nematbakhsh Signed-off-by: Enric Balletbo i Serra Reviewed-by: Andy Shevchenko Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_commands.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 2b96e630e3b6..f2edd9969b40 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -2948,6 +2948,9 @@ struct ec_response_usb_pd_control_v1 { #define EC_CMD_USB_PD_PORTS 0x102 +/* Maximum number of PD ports on a device, num_ports will be <= this */ +#define EC_USB_PD_MAX_PORTS 8 + struct ec_response_usb_pd_ports { uint8_t num_ports; } __packed; -- cgit v1.2.3 From c1d1e91aff3d1183d6b16a282c2575e3e006cee4 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 23 Mar 2018 18:42:47 +0100 Subject: platform/chrome: mfd/cros_ec_dev: Add sysfs entry to set keyboard wake lid angle This adds a sysfs attribute (/sys/class/chromeos/cros_ec/kb_wake_angle) used to set and get the keyboard wake lid angle. This attribute is present only if 2 accelerometers are controlled by the EC. This patch also moves the cros_ec features check before the device is added so the features map obtained from the EC is ready on time. Signed-off-by: Gwendal Grignou Signed-off-by: Enric Balletbo i Serra Reviewed-by: Andy Shevchenko Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index c61535979b8f..2d4e23c9ea0a 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -183,6 +183,7 @@ struct cros_ec_debugfs; * @ec_dev: cros_ec_device structure to talk to the physical device * @dev: pointer to the platform device * @debug_info: cros_ec_debugfs structure for debugging information + * @has_kb_wake_angle: true if at least 2 accelerometer are connected to the EC. * @cmd_offset: offset to apply for each command. */ struct cros_ec_dev { @@ -191,6 +192,7 @@ struct cros_ec_dev { struct cros_ec_device *ec_dev; struct device *dev; struct cros_ec_debugfs *debug_info; + bool has_kb_wake_angle; u16 cmd_offset; u32 features[2]; }; -- cgit v1.2.3 From eb59254608bc1d42c4c6afdcdce9c0d3ce02b318 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 10 Apr 2018 16:27:36 -0700 Subject: mm: introduce NR_INDIRECTLY_RECLAIMABLE_BYTES Patch series "indirectly reclaimable memory", v2. This patchset introduces the concept of indirectly reclaimable memory and applies it to fix the issue of when a big number of dentries with external names can significantly affect the MemAvailable value. This patch (of 3): Introduce a concept of indirectly reclaimable memory and adds the corresponding memory counter and /proc/vmstat item. Indirectly reclaimable memory is any sort of memory, used by the kernel (except of reclaimable slabs), which is actually reclaimable, i.e. will be released under memory pressure. The counter is in bytes, as it's not always possible to count such objects in pages. The name contains BYTES by analogy to NR_KERNEL_STACK_KB. Link: http://lkml.kernel.org/r/20180305133743.12746-2-guro@fb.com Signed-off-by: Roman Gushchin Reviewed-by: Andrew Morton Cc: Alexander Viro Cc: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f11ae29005f1..a0c9e45a859a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -180,6 +180,7 @@ enum node_stat_item { NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ + NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */ NR_VM_NODE_STAT_ITEMS }; -- cgit v1.2.3 From e3c1ac586c9922180146605bfb4816e3b11148c5 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 10 Apr 2018 16:28:03 -0700 Subject: mm/vmscan: don't mess with pgdat->flags in memcg reclaim memcg reclaim may alter pgdat->flags based on the state of LRU lists in cgroup and its children. PGDAT_WRITEBACK may force kswapd to sleep congested_wait(), PGDAT_DIRTY may force kswapd to writeback filesystem pages. But the worst here is PGDAT_CONGESTED, since it may force all direct reclaims to stall in wait_iff_congested(). Note that only kswapd have powers to clear any of these bits. This might just never happen if cgroup limits configured that way. So all direct reclaims will stall as long as we have some congested bdi in the system. Leave all pgdat->flags manipulations to kswapd. kswapd scans the whole pgdat, only kswapd can clear pgdat->flags once node is balanced, thus it's reasonable to leave all decisions about node state to kswapd. Why only kswapd? Why not allow to global direct reclaim change these flags? It is because currently only kswapd can clear these flags. I'm less worried about the case when PGDAT_CONGESTED falsely not set, and more worried about the case when it falsely set. If direct reclaimer sets PGDAT_CONGESTED, do we have guarantee that after the congestion problem is sorted out, kswapd will be woken up and clear the flag? It seems like there is no such guarantee. E.g. direct reclaimers may eventually balance pgdat and kswapd simply won't wake up (see wakeup_kswapd()). Moving pgdat->flags manipulation to kswapd, means that cgroup2 recalim now loses its congestion throttling mechanism. Add per-cgroup congestion state and throttle cgroup2 reclaimers if memcg is in congestion state. Currently there is no need in per-cgroup PGDAT_WRITEBACK and PGDAT_DIRTY bits since they alter only kswapd behavior. The problem could be easily demonstrated by creating heavy congestion in one cgroup: echo "+memory" > /sys/fs/cgroup/cgroup.subtree_control mkdir -p /sys/fs/cgroup/congester echo 512M > /sys/fs/cgroup/congester/memory.max echo $$ > /sys/fs/cgroup/congester/cgroup.procs /* generate a lot of diry data on slow HDD */ while true; do dd if=/dev/zero of=/mnt/sdb/zeroes bs=1M count=1024; done & .... while true; do dd if=/dev/zero of=/mnt/sdb/zeroes bs=1M count=1024; done & and some job in another cgroup: mkdir /sys/fs/cgroup/victim echo 128M > /sys/fs/cgroup/victim/memory.max # time cat /dev/sda > /dev/null real 10m15.054s user 0m0.487s sys 1m8.505s According to the tracepoint in wait_iff_congested(), the 'cat' spent 50% of the time sleeping there. With the patch, cat don't waste time anymore: # time cat /dev/sda > /dev/null real 5m32.911s user 0m0.411s sys 0m56.664s [aryabinin@virtuozzo.com: congestion state should be per-node] Link: http://lkml.kernel.org/r/20180406135215.10057-1-aryabinin@virtuozzo.com [ayabinin@virtuozzo.com: make congestion state per-cgroup-per-node instead of just per-cgroup[ Link: http://lkml.kernel.org/r/20180406180254.8970-2-aryabinin@virtuozzo.com Link: http://lkml.kernel.org/r/20180323152029.11084-5-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Mel Gorman Cc: Tejun Heo Cc: Michal Hocko Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 2 +- include/linux/memcontrol.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 3e4ce54d84ab..e6cbb915ee56 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -175,7 +175,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) } long congestion_wait(int sync, long timeout); -long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout); +long wait_iff_congested(int sync, long timeout); static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi) { diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c46016bb25eb..f292efac378d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -120,6 +120,9 @@ struct mem_cgroup_per_node { unsigned long usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ bool on_tree; + bool congested; /* memcg has many dirty pages */ + /* backed by a congested BDI */ + struct mem_cgroup *memcg; /* Back pointer, we cannot */ /* use container_of */ }; -- cgit v1.2.3 From d51d1e64500fcb48fc6a18c77c965b8f48a175f2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Apr 2018 16:28:07 -0700 Subject: mm, vmscan, tracing: use pointer to reclaim_stat struct in trace event The trace event trace_mm_vmscan_lru_shrink_inactive() currently has 12 parameters! Seven of them are from the reclaim_stat structure. This structure is currently local to mm/vmscan.c. By moving it to the global vmstat.h header, we can also reference it from the vmscan tracepoints. In moving it, it brings down the overhead of passing so many arguments to the trace event. In the future, we may limit the number of arguments that a trace event may pass (ideally just 6, but more realistically it may be 8). Before this patch, the code to call the trace event is this: 0f 83 aa fe ff ff jae ffffffff811e6261 48 8b 45 a0 mov -0x60(%rbp),%rax 45 8b 64 24 20 mov 0x20(%r12),%r12d 44 8b 6d d4 mov -0x2c(%rbp),%r13d 8b 4d d0 mov -0x30(%rbp),%ecx 44 8b 75 cc mov -0x34(%rbp),%r14d 44 8b 7d c8 mov -0x38(%rbp),%r15d 48 89 45 90 mov %rax,-0x70(%rbp) 8b 83 b8 fe ff ff mov -0x148(%rbx),%eax 8b 55 c0 mov -0x40(%rbp),%edx 8b 7d c4 mov -0x3c(%rbp),%edi 8b 75 b8 mov -0x48(%rbp),%esi 89 45 80 mov %eax,-0x80(%rbp) 65 ff 05 e4 f7 e2 7e incl %gs:0x7ee2f7e4(%rip) # 15bd0 <__preempt_count> 48 8b 05 75 5b 13 01 mov 0x1135b75(%rip),%rax # ffffffff8231bf68 <__tracepoint_mm_vmscan_lru_shrink_inactive+0x28> 48 85 c0 test %rax,%rax 74 72 je ffffffff811e646a 48 89 c3 mov %rax,%rbx 4c 8b 10 mov (%rax),%r10 89 f8 mov %edi,%eax 48 89 85 68 ff ff ff mov %rax,-0x98(%rbp) 89 f0 mov %esi,%eax 48 89 85 60 ff ff ff mov %rax,-0xa0(%rbp) 89 c8 mov %ecx,%eax 48 89 85 78 ff ff ff mov %rax,-0x88(%rbp) 89 d0 mov %edx,%eax 48 89 85 70 ff ff ff mov %rax,-0x90(%rbp) 8b 45 8c mov -0x74(%rbp),%eax 48 8b 7b 08 mov 0x8(%rbx),%rdi 48 83 c3 18 add $0x18,%rbx 50 push %rax 41 54 push %r12 41 55 push %r13 ff b5 78 ff ff ff pushq -0x88(%rbp) 41 56 push %r14 41 57 push %r15 ff b5 70 ff ff ff pushq -0x90(%rbp) 4c 8b 8d 68 ff ff ff mov -0x98(%rbp),%r9 4c 8b 85 60 ff ff ff mov -0xa0(%rbp),%r8 48 8b 4d 98 mov -0x68(%rbp),%rcx 48 8b 55 90 mov -0x70(%rbp),%rdx 8b 75 80 mov -0x80(%rbp),%esi 41 ff d2 callq *%r10 After the patch: 0f 83 a8 fe ff ff jae ffffffff811e626d 8b 9b b8 fe ff ff mov -0x148(%rbx),%ebx 45 8b 64 24 20 mov 0x20(%r12),%r12d 4c 8b 6d a0 mov -0x60(%rbp),%r13 65 ff 05 f5 f7 e2 7e incl %gs:0x7ee2f7f5(%rip) # 15bd0 <__preempt_count> 4c 8b 35 86 5b 13 01 mov 0x1135b86(%rip),%r14 # ffffffff8231bf68 <__tracepoint_mm_vmscan_lru_shrink_inactive+0x28> 4d 85 f6 test %r14,%r14 74 2a je ffffffff811e6411 49 8b 06 mov (%r14),%rax 8b 4d 8c mov -0x74(%rbp),%ecx 49 8b 7e 08 mov 0x8(%r14),%rdi 49 83 c6 18 add $0x18,%r14 4c 89 ea mov %r13,%rdx 45 89 e1 mov %r12d,%r9d 4c 8d 45 b8 lea -0x48(%rbp),%r8 89 de mov %ebx,%esi 51 push %rcx 48 8b 4d 98 mov -0x68(%rbp),%rcx ff d0 callq *%rax Link: http://lkml.kernel.org/r/2559d7cb-ec60-1200-2362-04fa34fd02bb@fb.com Link: http://lkml.kernel.org/r/20180322121003.4177af15@gandalf.local.home Signed-off-by: Steven Rostedt (VMware) Reported-by: Alexei Starovoitov Acked-by: David Rientjes Acked-by: Michal Hocko Cc: Mel Gorman Cc: Vlastimil Babka Cc: Andrey Ryabinin Cc: Alexei Starovoitov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index a4c2317d8b9f..f25cef84b41d 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -20,6 +20,17 @@ extern int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif +struct reclaim_stat { + unsigned nr_dirty; + unsigned nr_unqueued_dirty; + unsigned nr_congested; + unsigned nr_writeback; + unsigned nr_immediate; + unsigned nr_activate; + unsigned nr_ref_keep; + unsigned nr_unmap_fail; +}; + #ifdef CONFIG_VM_EVENT_COUNTERS /* * Light weight per cpu counter implementation. -- cgit v1.2.3 From b28b08de436a638c82d0cf3dcdbdbad055baf1fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 10 Apr 2018 16:28:15 -0700 Subject: mm/hmm: fix header file if/else/endif maze MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The #if/#else/#endif for IS_ENABLED(CONFIG_HMM) were wrong. Because of this after multiple include there was multiple definition of both hmm_mm_init() and hmm_mm_destroy() leading to build failure if HMM was enabled (CONFIG_HMM set). Link: http://lkml.kernel.org/r/20180323005527.758-3-jglisse@redhat.com Signed-off-by: Jérôme Glisse Acked-by: Balbir Singh Cc: Andrew Morton Cc: Ralph Campbell Cc: John Hubbard Cc: Evgeny Baskakov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 325017ad9311..36dd21fe5caf 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -498,23 +498,16 @@ struct hmm_device { struct hmm_device *hmm_device_new(void *drvdata); void hmm_device_put(struct hmm_device *hmm_device); #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ -#endif /* IS_ENABLED(CONFIG_HMM) */ /* Below are for HMM internal use only! Not to be used by device driver! */ -#if IS_ENABLED(CONFIG_HMM_MIRROR) void hmm_mm_destroy(struct mm_struct *mm); static inline void hmm_mm_init(struct mm_struct *mm) { mm->hmm = NULL; } -#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -static inline void hmm_mm_destroy(struct mm_struct *mm) {} -static inline void hmm_mm_init(struct mm_struct *mm) {} -#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ - - #else /* IS_ENABLED(CONFIG_HMM) */ static inline void hmm_mm_destroy(struct mm_struct *mm) {} static inline void hmm_mm_init(struct mm_struct *mm) {} +#endif /* IS_ENABLED(CONFIG_HMM) */ #endif /* LINUX_HMM_H */ -- cgit v1.2.3 From e1401513c6b5efec59678a4d4e9f90957684b7e3 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 10 Apr 2018 16:28:19 -0700 Subject: mm/hmm: HMM should have a callback before MM is destroyed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hmm_mirror_register() registers a callback for when the CPU pagetable is modified. Normally, the device driver will call hmm_mirror_unregister() when the process using the device is finished. However, if the process exits uncleanly, the struct_mm can be destroyed with no warning to the device driver. Link: http://lkml.kernel.org/r/20180323005527.758-4-jglisse@redhat.com Signed-off-by: Ralph Campbell Signed-off-by: Jérôme Glisse Reviewed-by: John Hubbard Cc: Evgeny Baskakov Cc: Mark Hairgrove Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 36dd21fe5caf..fa7b51f65905 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -218,6 +218,16 @@ enum hmm_update_type { * @update: callback to update range on a device */ struct hmm_mirror_ops { + /* release() - release hmm_mirror + * + * @mirror: pointer to struct hmm_mirror + * + * This is called when the mm_struct is being released. + * The callback should make sure no references to the mirror occur + * after the callback returns. + */ + void (*release)(struct hmm_mirror *mirror); + /* sync_cpu_device_pagetables() - synchronize page tables * * @mirror: pointer to struct hmm_mirror -- cgit v1.2.3 From 08232a4544cc6befaabfbec2087bedaf21b0da34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 10 Apr 2018 16:28:30 -0700 Subject: mm/hmm: use struct for hmm_vma_fault(), hmm_vma_get_pfns() parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both hmm_vma_fault() and hmm_vma_get_pfns() were taking a hmm_range struct as parameter and were initializing that struct with others of their parameters. Have caller of those function do this as they are likely to already do and only pass this struct to both function this shorten function signature and make it easier in the future to add new parameters by simply adding them to the structure. Link: http://lkml.kernel.org/r/20180323005527.758-7-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: John Hubbard Cc: Evgeny Baskakov Cc: Ralph Campbell Cc: Mark Hairgrove Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index fa7b51f65905..d0d6760cdada 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -274,6 +274,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror); /* * struct hmm_range - track invalidation lock on virtual address range * + * @vma: the vm area struct for the range * @list: all range lock are on a list * @start: range virtual start address (inclusive) * @end: range virtual end address (exclusive) @@ -281,6 +282,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror); * @valid: pfns array did not change since it has been fill by an HMM function */ struct hmm_range { + struct vm_area_struct *vma; struct list_head list; unsigned long start; unsigned long end; @@ -301,12 +303,8 @@ struct hmm_range { * * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID ! */ -int hmm_vma_get_pfns(struct vm_area_struct *vma, - struct hmm_range *range, - unsigned long start, - unsigned long end, - hmm_pfn_t *pfns); -bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range); +int hmm_vma_get_pfns(struct hmm_range *range); +bool hmm_vma_range_done(struct hmm_range *range); /* @@ -327,13 +325,7 @@ bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range); * * See the function description in mm/hmm.c for further documentation. */ -int hmm_vma_fault(struct vm_area_struct *vma, - struct hmm_range *range, - unsigned long start, - unsigned long end, - hmm_pfn_t *pfns, - bool write, - bool block); +int hmm_vma_fault(struct hmm_range *range, bool write, bool block); #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -- cgit v1.2.3 From 86586a41b8fe655e28be418a40e9bb2bb478cdd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 10 Apr 2018 16:28:34 -0700 Subject: mm/hmm: remove HMM_PFN_READ flag and ignore peculiar architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only peculiar architecture allow write without read thus assume that any valid pfn do allow for read. Note we do not care for write only because it does make sense with thing like atomic compare and exchange or any other operations that allow you to get the memory value through them. Link: http://lkml.kernel.org/r/20180323005527.758-8-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: John Hubbard Cc: Evgeny Baskakov Cc: Ralph Campbell Cc: Mark Hairgrove Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index d0d6760cdada..dd907f614dfe 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -83,8 +83,7 @@ struct hmm; * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page * * Flags: - * HMM_PFN_VALID: pfn is valid - * HMM_PFN_READ: CPU page table has read permission set + * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. * HMM_PFN_WRITE: CPU page table has write permission set * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none() @@ -97,13 +96,12 @@ struct hmm; typedef unsigned long hmm_pfn_t; #define HMM_PFN_VALID (1 << 0) -#define HMM_PFN_READ (1 << 1) -#define HMM_PFN_WRITE (1 << 2) -#define HMM_PFN_ERROR (1 << 3) -#define HMM_PFN_EMPTY (1 << 4) -#define HMM_PFN_SPECIAL (1 << 5) -#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6) -#define HMM_PFN_SHIFT 7 +#define HMM_PFN_WRITE (1 << 1) +#define HMM_PFN_ERROR (1 << 2) +#define HMM_PFN_EMPTY (1 << 3) +#define HMM_PFN_SPECIAL (1 << 4) +#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 5) +#define HMM_PFN_SHIFT 6 /* * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t -- cgit v1.2.3 From ff05c0c6bbe5043af6a1686522ed845f40ba49ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 10 Apr 2018 16:28:38 -0700 Subject: mm/hmm: use uint64_t for HMM pfn instead of defining hmm_pfn_t to ulong MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All device driver we care about are using 64bits page table entry. In order to match this and to avoid useless define convert all HMM pfn to directly use uint64_t. It is a first step on the road to allow driver to directly use pfn value return by HMM (saving memory and CPU cycles use for conversion between the two). Link: http://lkml.kernel.org/r/20180323005527.758-9-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: John Hubbard Cc: Evgeny Baskakov Cc: Ralph Campbell Cc: Mark Hairgrove Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index dd907f614dfe..54d684fe3b90 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -80,8 +80,6 @@ struct hmm; /* - * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page - * * Flags: * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. * HMM_PFN_WRITE: CPU page table has write permission set @@ -93,8 +91,6 @@ struct hmm; * set and the pfn value is undefined. * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE) */ -typedef unsigned long hmm_pfn_t; - #define HMM_PFN_VALID (1 << 0) #define HMM_PFN_WRITE (1 << 1) #define HMM_PFN_ERROR (1 << 2) @@ -104,14 +100,14 @@ typedef unsigned long hmm_pfn_t; #define HMM_PFN_SHIFT 6 /* - * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t - * @pfn: hmm_pfn_t to convert to struct page - * Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise + * hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn + * @pfn: HMM pfn value to get corresponding struct page from + * Returns: struct page pointer if pfn is a valid HMM pfn, NULL otherwise * - * If the hmm_pfn_t is valid (ie valid flag set) then return the struct page - * matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL. + * If the HMM pfn is valid (ie valid flag set) then return the struct page + * matching the pfn value stored in the HMM pfn. Otherwise return NULL. */ -static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn) +static inline struct page *hmm_pfn_to_page(uint64_t pfn) { if (!(pfn & HMM_PFN_VALID)) return NULL; @@ -119,11 +115,11 @@ static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn) } /* - * hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t - * @pfn: hmm_pfn_t to extract pfn from - * Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise + * hmm_pfn_to_pfn() - return pfn value store in a HMM pfn + * @pfn: HMM pfn value to extract pfn from + * Returns: pfn value if HMM pfn is valid, -1UL otherwise */ -static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn) +static inline unsigned long hmm_pfn_to_pfn(uint64_t pfn) { if (!(pfn & HMM_PFN_VALID)) return -1UL; @@ -131,21 +127,21 @@ static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn) } /* - * hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page - * @page: struct page pointer for which to create the hmm_pfn_t - * Returns: valid hmm_pfn_t for the page + * hmm_pfn_from_page() - create a valid HMM pfn value from struct page + * @page: struct page pointer for which to create the HMM pfn + * Returns: valid HMM pfn for the page */ -static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page) +static inline uint64_t hmm_pfn_from_page(struct page *page) { return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID; } /* - * hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn - * @pfn: pfn value for which to create the hmm_pfn_t - * Returns: valid hmm_pfn_t for the pfn + * hmm_pfn_from_pfn() - create a valid HMM pfn value from pfn + * @pfn: pfn value for which to create the HMM pfn + * Returns: valid HMM pfn for the pfn */ -static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn) +static inline uint64_t hmm_pfn_from_pfn(unsigned long pfn) { return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID; } @@ -284,7 +280,7 @@ struct hmm_range { struct list_head list; unsigned long start; unsigned long end; - hmm_pfn_t *pfns; + uint64_t *pfns; bool valid; }; @@ -307,7 +303,7 @@ bool hmm_vma_range_done(struct hmm_range *range); /* * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will - * not migrate any device memory back to system memory. The hmm_pfn_t array will + * not migrate any device memory back to system memory. The HMM pfn array will * be updated with the fault result and current snapshot of the CPU page table * for the range. * @@ -316,7 +312,7 @@ bool hmm_vma_range_done(struct hmm_range *range); * function returns -EAGAIN. * * Return value does not reflect if the fault was successful for every single - * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to + * address or not. Therefore, the caller must to inspect the HMM pfn array to * determine fault status for each address. * * Trying to fault inside an invalid vma will result in -EINVAL. -- cgit v1.2.3 From 5504ed29692faad06ea74c4275e96a8ffc83a1e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 10 Apr 2018 16:28:46 -0700 Subject: mm/hmm: do not differentiate between empty entry or missing directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no point in differentiating between a range for which there is not even a directory (and thus entries) and empty entry (pte_none() or pmd_none() returns true). Simply drop the distinction ie remove HMM_PFN_EMPTY flag and merge now duplicate hmm_vma_walk_hole() and hmm_vma_walk_clear() functions. Link: http://lkml.kernel.org/r/20180323005527.758-11-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: John Hubbard Cc: Evgeny Baskakov Cc: Ralph Campbell Cc: Mark Hairgrove Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 54d684fe3b90..cf283db22106 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -84,7 +84,6 @@ struct hmm; * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. * HMM_PFN_WRITE: CPU page table has write permission set * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory - * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none() * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the * result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not * be mirrored by a device, because the entry will never have HMM_PFN_VALID @@ -94,10 +93,9 @@ struct hmm; #define HMM_PFN_VALID (1 << 0) #define HMM_PFN_WRITE (1 << 1) #define HMM_PFN_ERROR (1 << 2) -#define HMM_PFN_EMPTY (1 << 3) -#define HMM_PFN_SPECIAL (1 << 4) -#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 5) -#define HMM_PFN_SHIFT 6 +#define HMM_PFN_SPECIAL (1 << 3) +#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 4) +#define HMM_PFN_SHIFT 5 /* * hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn -- cgit v1.2.3 From b2744118a65efee90aea95f7cd31bf74eb8009f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 10 Apr 2018 16:28:49 -0700 Subject: mm/hmm: rename HMM_PFN_DEVICE_UNADDRESSABLE to HMM_PFN_DEVICE_PRIVATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make naming consistent across code, DEVICE_PRIVATE is the name use outside HMM code so use that one. Link: http://lkml.kernel.org/r/20180323005527.758-12-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: John Hubbard Cc: Evgeny Baskakov Cc: Ralph Campbell Cc: Mark Hairgrove Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index cf283db22106..e8515cad5a00 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -88,13 +88,13 @@ struct hmm; * result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not * be mirrored by a device, because the entry will never have HMM_PFN_VALID * set and the pfn value is undefined. - * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE) + * HMM_PFN_DEVICE_PRIVATE: unaddressable device memory (ZONE_DEVICE) */ #define HMM_PFN_VALID (1 << 0) #define HMM_PFN_WRITE (1 << 1) #define HMM_PFN_ERROR (1 << 2) #define HMM_PFN_SPECIAL (1 << 3) -#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 4) +#define HMM_PFN_DEVICE_PRIVATE (1 << 4) #define HMM_PFN_SHIFT 5 /* -- cgit v1.2.3 From 2aee09d8c1164219971c7b396f2235bd5334018c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 10 Apr 2018 16:29:02 -0700 Subject: mm/hmm: change hmm_vma_fault() to allow write fault on page basis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This changes hmm_vma_fault() to not take a global write fault flag for a range but instead rely on caller to populate HMM pfns array with proper fault flag ie HMM_PFN_VALID if driver want read fault for that address or HMM_PFN_VALID and HMM_PFN_WRITE for write. Moreover by setting HMM_PFN_DEVICE_PRIVATE the device driver can ask for device private memory to be migrated back to system memory through page fault. This is more flexible API and it better reflects how device handles and reports fault. Link: http://lkml.kernel.org/r/20180323005527.758-15-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Evgeny Baskakov Cc: Ralph Campbell Cc: Mark Hairgrove Cc: John Hubbard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index e8515cad5a00..0f7ea3074175 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -317,7 +317,7 @@ bool hmm_vma_range_done(struct hmm_range *range); * * See the function description in mm/hmm.c for further documentation. */ -int hmm_vma_fault(struct hmm_range *range, bool write, bool block); +int hmm_vma_fault(struct hmm_range *range, bool block); #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -- cgit v1.2.3 From f88a1e90c665408732ab16ea48e1a182dce597a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Tue, 10 Apr 2018 16:29:06 -0700 Subject: mm/hmm: use device driver encoding for HMM pfn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users of hmm_vma_fault() and hmm_vma_get_pfns() provide a flags array and pfn shift value allowing them to define their own encoding for HMM pfn that are fill inside the pfns array of the hmm_range struct. With this device driver can get pfn that match their own private encoding out of HMM without having to do any conversion. [rcampbell@nvidia.com: don't ignore specific pte fault flag in hmm_vma_fault()] Link: http://lkml.kernel.org/r/20180326213009.2460-2-jglisse@redhat.com [rcampbell@nvidia.com: clarify fault logic for device private memory] Link: http://lkml.kernel.org/r/20180326213009.2460-3-jglisse@redhat.com Link: http://lkml.kernel.org/r/20180323005527.758-16-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Ralph Campbell Cc: Evgeny Baskakov Cc: Ralph Campbell Cc: Mark Hairgrove Cc: John Hubbard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 130 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 94 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 0f7ea3074175..5d26e0a223d9 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -80,68 +80,145 @@ struct hmm; /* + * hmm_pfn_flag_e - HMM flag enums + * * Flags: * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. * HMM_PFN_WRITE: CPU page table has write permission set + * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE) + * + * The driver provide a flags array, if driver valid bit for an entry is bit + * 3 ie (entry & (1 << 3)) is true if entry is valid then driver must provide + * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3. + * Same logic apply to all flags. This is same idea as vm_page_prot in vma + * except that this is per device driver rather than per architecture. + */ +enum hmm_pfn_flag_e { + HMM_PFN_VALID = 0, + HMM_PFN_WRITE, + HMM_PFN_DEVICE_PRIVATE, + HMM_PFN_FLAG_MAX +}; + +/* + * hmm_pfn_value_e - HMM pfn special value + * + * Flags: * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory + * HMM_PFN_NONE: corresponding CPU page table entry is pte_none() * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the * result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not * be mirrored by a device, because the entry will never have HMM_PFN_VALID * set and the pfn value is undefined. - * HMM_PFN_DEVICE_PRIVATE: unaddressable device memory (ZONE_DEVICE) + * + * Driver provide entry value for none entry, error entry and special entry, + * driver can alias (ie use same value for error and special for instance). It + * should not alias none and error or special. + * + * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be: + * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous, + * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table + * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one */ -#define HMM_PFN_VALID (1 << 0) -#define HMM_PFN_WRITE (1 << 1) -#define HMM_PFN_ERROR (1 << 2) -#define HMM_PFN_SPECIAL (1 << 3) -#define HMM_PFN_DEVICE_PRIVATE (1 << 4) -#define HMM_PFN_SHIFT 5 +enum hmm_pfn_value_e { + HMM_PFN_ERROR, + HMM_PFN_NONE, + HMM_PFN_SPECIAL, + HMM_PFN_VALUE_MAX +}; + +/* + * struct hmm_range - track invalidation lock on virtual address range + * + * @vma: the vm area struct for the range + * @list: all range lock are on a list + * @start: range virtual start address (inclusive) + * @end: range virtual end address (exclusive) + * @pfns: array of pfns (big enough for the range) + * @flags: pfn flags to match device driver page table + * @values: pfn value for some special case (none, special, error, ...) + * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) + * @valid: pfns array did not change since it has been fill by an HMM function + */ +struct hmm_range { + struct vm_area_struct *vma; + struct list_head list; + unsigned long start; + unsigned long end; + uint64_t *pfns; + const uint64_t *flags; + const uint64_t *values; + uint8_t pfn_shift; + bool valid; +}; /* * hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn + * @range: range use to decode HMM pfn value * @pfn: HMM pfn value to get corresponding struct page from * Returns: struct page pointer if pfn is a valid HMM pfn, NULL otherwise * * If the HMM pfn is valid (ie valid flag set) then return the struct page * matching the pfn value stored in the HMM pfn. Otherwise return NULL. */ -static inline struct page *hmm_pfn_to_page(uint64_t pfn) +static inline struct page *hmm_pfn_to_page(const struct hmm_range *range, + uint64_t pfn) { - if (!(pfn & HMM_PFN_VALID)) + if (pfn == range->values[HMM_PFN_NONE]) + return NULL; + if (pfn == range->values[HMM_PFN_ERROR]) + return NULL; + if (pfn == range->values[HMM_PFN_SPECIAL]) return NULL; - return pfn_to_page(pfn >> HMM_PFN_SHIFT); + if (!(pfn & range->flags[HMM_PFN_VALID])) + return NULL; + return pfn_to_page(pfn >> range->pfn_shift); } /* * hmm_pfn_to_pfn() - return pfn value store in a HMM pfn + * @range: range use to decode HMM pfn value * @pfn: HMM pfn value to extract pfn from * Returns: pfn value if HMM pfn is valid, -1UL otherwise */ -static inline unsigned long hmm_pfn_to_pfn(uint64_t pfn) +static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range, + uint64_t pfn) { - if (!(pfn & HMM_PFN_VALID)) + if (pfn == range->values[HMM_PFN_NONE]) + return -1UL; + if (pfn == range->values[HMM_PFN_ERROR]) + return -1UL; + if (pfn == range->values[HMM_PFN_SPECIAL]) return -1UL; - return (pfn >> HMM_PFN_SHIFT); + if (!(pfn & range->flags[HMM_PFN_VALID])) + return -1UL; + return (pfn >> range->pfn_shift); } /* * hmm_pfn_from_page() - create a valid HMM pfn value from struct page + * @range: range use to encode HMM pfn value * @page: struct page pointer for which to create the HMM pfn * Returns: valid HMM pfn for the page */ -static inline uint64_t hmm_pfn_from_page(struct page *page) +static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range, + struct page *page) { - return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID; + return (page_to_pfn(page) << range->pfn_shift) | + range->flags[HMM_PFN_VALID]; } /* * hmm_pfn_from_pfn() - create a valid HMM pfn value from pfn + * @range: range use to encode HMM pfn value * @pfn: pfn value for which to create the HMM pfn * Returns: valid HMM pfn for the pfn */ -static inline uint64_t hmm_pfn_from_pfn(unsigned long pfn) +static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range, + unsigned long pfn) { - return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID; + return (pfn << range->pfn_shift) | + range->flags[HMM_PFN_VALID]; } @@ -263,25 +340,6 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); void hmm_mirror_unregister(struct hmm_mirror *mirror); -/* - * struct hmm_range - track invalidation lock on virtual address range - * - * @vma: the vm area struct for the range - * @list: all range lock are on a list - * @start: range virtual start address (inclusive) - * @end: range virtual end address (exclusive) - * @pfns: array of pfns (big enough for the range) - * @valid: pfns array did not change since it has been fill by an HMM function - */ -struct hmm_range { - struct vm_area_struct *vma; - struct list_head list; - unsigned long start; - unsigned long end; - uint64_t *pfns; - bool valid; -}; - /* * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device * driver lock that serializes device page table updates, then call -- cgit v1.2.3 From 9d8a463a7016e9e5578a561588a18acef139919c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Apr 2018 16:29:13 -0700 Subject: mm/hmm: fix header file if/else/endif maze, again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last fix was still wrong, as we need the inline dummy functions also for the case that CONFIG_HMM is enabled but CONFIG_HMM_MIRROR is not: kernel/fork.o: In function `__mmdrop': fork.c:(.text+0x14f6): undefined reference to `hmm_mm_destroy' This adds back the second copy of the dummy functions, hopefully this time in the right place. Link: http://lkml.kernel.org/r/20180404110236.804484-1-arnd@arndb.de Fixes: 8900d06a277a ("mm/hmm: fix header file if/else/endif maze") Signed-off-by: Arnd Bergmann Reviewed-by: Jérôme Glisse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 5d26e0a223d9..39988924de3a 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -376,8 +376,18 @@ bool hmm_vma_range_done(struct hmm_range *range); * See the function description in mm/hmm.c for further documentation. */ int hmm_vma_fault(struct hmm_range *range, bool block); -#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ +/* Below are for HMM internal use only! Not to be used by device driver! */ +void hmm_mm_destroy(struct mm_struct *mm); + +static inline void hmm_mm_init(struct mm_struct *mm) +{ + mm->hmm = NULL; +} +#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */ +static inline void hmm_mm_destroy(struct mm_struct *mm) {} +static inline void hmm_mm_init(struct mm_struct *mm) {} +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) struct hmm_devmem; @@ -550,16 +560,9 @@ struct hmm_device { struct hmm_device *hmm_device_new(void *drvdata); void hmm_device_put(struct hmm_device *hmm_device); #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ - -/* Below are for HMM internal use only! Not to be used by device driver! */ -void hmm_mm_destroy(struct mm_struct *mm); - -static inline void hmm_mm_init(struct mm_struct *mm) -{ - mm->hmm = NULL; -} #else /* IS_ENABLED(CONFIG_HMM) */ static inline void hmm_mm_destroy(struct mm_struct *mm) {} static inline void hmm_mm_init(struct mm_struct *mm) {} #endif /* IS_ENABLED(CONFIG_HMM) */ + #endif /* LINUX_HMM_H */ -- cgit v1.2.3 From e27be240df53f1a20c659168e722b5d9f16cc7f4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 10 Apr 2018 16:29:45 -0700 Subject: mm: memcg: make sure memory.events is uptodate when waking pollers Commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in memory.stat reporting") added per-cpu drift to all memory cgroup stats and events shown in memory.stat and memory.events. For memory.stat this is acceptable. But memory.events issues file notifications, and somebody polling the file for changes will be confused when the counters in it are unchanged after a wakeup. Luckily, the events in memory.events - MEMCG_LOW, MEMCG_HIGH, MEMCG_MAX, MEMCG_OOM - are sufficiently rare and high-level that we don't need per-cpu buffering for them: MEMCG_HIGH and MEMCG_MAX would be the most frequent, but they're counting invocations of reclaim, which is a complex operation that touches many shared cachelines. This splits memory.events from the generic VM events and tracks them in their own, unbuffered atomic counters. That's also cleaner, as it eliminates the ugly enum nesting of VM and cgroup events. [hannes@cmpxchg.org: "array subscript is above array bounds"] Link: http://lkml.kernel.org/r/20180406155441.GA20806@cmpxchg.org Link: http://lkml.kernel.org/r/20180405175507.GA24817@cmpxchg.org Fixes: a983b5ebee57 ("mm: memcontrol: fix excessive complexity in memory.stat reporting") Signed-off-by: Johannes Weiner Reported-by: Tejun Heo Acked-by: Tejun Heo Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Roman Gushchin Cc: Rik van Riel Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f292efac378d..d99b71bc2c66 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -48,13 +48,12 @@ enum memcg_stat_item { MEMCG_NR_STAT, }; -/* Cgroup-specific events, on top of universal VM events */ -enum memcg_event_item { - MEMCG_LOW = NR_VM_EVENT_ITEMS, +enum memcg_memory_event { + MEMCG_LOW, MEMCG_HIGH, MEMCG_MAX, MEMCG_OOM, - MEMCG_NR_EVENTS, + MEMCG_NR_MEMORY_EVENTS, }; struct mem_cgroup_reclaim_cookie { @@ -88,7 +87,7 @@ enum mem_cgroup_events_target { struct mem_cgroup_stat_cpu { long count[MEMCG_NR_STAT]; - unsigned long events[MEMCG_NR_EVENTS]; + unsigned long events[NR_VM_EVENT_ITEMS]; unsigned long nr_page_events; unsigned long targets[MEM_CGROUP_NTARGETS]; }; @@ -205,7 +204,8 @@ struct mem_cgroup { /* OOM-Killer disable */ int oom_kill_disable; - /* handle for "memory.events" */ + /* memory.events */ + atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; struct cgroup_file events_file; /* protect arrays of thresholds */ @@ -234,9 +234,10 @@ struct mem_cgroup { struct task_struct *move_lock_task; unsigned long move_lock_flags; + /* memory.stat */ struct mem_cgroup_stat_cpu __percpu *stat_cpu; atomic_long_t stat[MEMCG_NR_STAT]; - atomic_long_t events[MEMCG_NR_EVENTS]; + atomic_long_t events[NR_VM_EVENT_ITEMS]; unsigned long socket_pressure; @@ -648,9 +649,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); -/* idx can be of type enum memcg_event_item or vm_event_item */ static inline void __count_memcg_events(struct mem_cgroup *memcg, - int idx, unsigned long count) + enum vm_event_item idx, + unsigned long count) { unsigned long x; @@ -666,7 +667,8 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg, } static inline void count_memcg_events(struct mem_cgroup *memcg, - int idx, unsigned long count) + enum vm_event_item idx, + unsigned long count) { unsigned long flags; @@ -675,9 +677,8 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, local_irq_restore(flags); } -/* idx can be of type enum memcg_event_item or vm_event_item */ static inline void count_memcg_page_event(struct page *page, - int idx) + enum vm_event_item idx) { if (page->mem_cgroup) count_memcg_events(page->mem_cgroup, idx, 1); @@ -701,10 +702,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -static inline void mem_cgroup_event(struct mem_cgroup *memcg, - enum memcg_event_item event) +static inline void memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event) { - count_memcg_events(memcg, event, 1); + atomic_long_inc(&memcg->memory_events[event]); cgroup_file_notify(&memcg->events_file); } @@ -724,8 +725,8 @@ static inline bool mem_cgroup_disabled(void) return true; } -static inline void mem_cgroup_event(struct mem_cgroup *memcg, - enum memcg_event_item event) +static inline void memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event) { } -- cgit v1.2.3 From 666feb21a0083e5b29ddd96588553ffa0cc357b6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 10 Apr 2018 16:30:03 -0700 Subject: mm, migrate: remove reason argument from new_page_t No allocation callback is using this argument anymore. new_page_node used to use this parameter to convey node_id resp. migration error up to move_pages code (do_move_page_to_node_array). The error status never made it into the final status field and we have a better way to communicate node id to the status field now. All other allocation callbacks simply ignored the argument so we can drop it finally. [mhocko@suse.com: fix migration callback] Link: http://lkml.kernel.org/r/20180105085259.GH2801@dhcp22.suse.cz [akpm@linux-foundation.org: fix alloc_misplaced_dst_page()] [mhocko@kernel.org: fix build] Link: http://lkml.kernel.org/r/20180103091134.GB11319@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20180103082555.14592-3-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Zi Yan Cc: Andrea Reale Cc: Anshuman Khandual Cc: Kirill A. Shutemov Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 3 +-- include/linux/page-isolation.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index ab45f8a0d288..e0393240bf64 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -7,8 +7,7 @@ #include #include -typedef struct page *new_page_t(struct page *page, unsigned long private, - int **reason); +typedef struct page *new_page_t(struct page *page, unsigned long private); typedef void free_page_t(struct page *page, unsigned long private); /* diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index cdad58bbfd8b..4ae347cbc36d 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -63,7 +63,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, bool skip_hwpoisoned_pages); -struct page *alloc_migrate_target(struct page *page, unsigned long private, - int **resultp); +struct page *alloc_migrate_target(struct page *page, unsigned long private); #endif -- cgit v1.2.3 From 94723aafb9e76414fada7c1c198733a86f01ea8f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 10 Apr 2018 16:30:07 -0700 Subject: mm: unclutter THP migration THP migration is hacked into the generic migration with rather surprising semantic. The migration allocation callback is supposed to check whether the THP can be migrated at once and if that is not the case then it allocates a simple page to migrate. unmap_and_move then fixes that up by spliting the THP into small pages while moving the head page to the newly allocated order-0 page. Remaning pages are moved to the LRU list by split_huge_page. The same happens if the THP allocation fails. This is really ugly and error prone [1]. I also believe that split_huge_page to the LRU lists is inherently wrong because all tail pages are not migrated. Some callers will just work around that by retrying (e.g. memory hotplug). There are other pfn walkers which are simply broken though. e.g. madvise_inject_error will migrate head and then advances next pfn by the huge page size. do_move_page_to_node_array, queue_pages_range (migrate_pages, mbind), will simply split the THP before migration if the THP migration is not supported then falls back to single page migration but it doesn't handle tail pages if the THP migration path is not able to allocate a fresh THP so we end up with ENOMEM and fail the whole migration which is a questionable behavior. Page compaction doesn't try to migrate large pages so it should be immune. This patch tries to unclutter the situation by moving the special THP handling up to the migrate_pages layer where it actually belongs. We simply split the THP page into the existing list if unmap_and_move fails with ENOMEM and retry. So we will _always_ migrate all THP subpages and specific migrate_pages users do not have to deal with this case in a special way. [1] http://lkml.kernel.org/r/20171121021855.50525-1-zi.yan@sent.com Link: http://lkml.kernel.org/r/20180103082555.14592-4-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Kirill A. Shutemov Reviewed-by: Zi Yan Cc: Andrea Reale Cc: Anshuman Khandual Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index e0393240bf64..f2b4abbca55e 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -42,9 +42,9 @@ static inline struct page *new_page_nodemask(struct page *page, return alloc_huge_page_nodemask(page_hstate(compound_head(page)), preferred_nid, nodemask); - if (thp_migration_supported() && PageTransHuge(page)) { - order = HPAGE_PMD_ORDER; + if (PageTransHuge(page)) { gfp_mask |= GFP_TRANSHUGE; + order = HPAGE_PMD_ORDER; } if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) -- cgit v1.2.3 From d3cda2337bbc9edd2a26b83cb00eaa8c048ff274 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 10 Apr 2018 16:30:11 -0700 Subject: mm/page_alloc: don't reserve ZONE_HIGHMEM for ZONE_MOVABLE request Freepage on ZONE_HIGHMEM doesn't work for kernel memory so it's not that important to reserve. When ZONE_MOVABLE is used, this problem would theorectically cause to decrease usable memory for GFP_HIGHUSER_MOVABLE allocation request which is mainly used for page cache and anon page allocation. So, fix it by setting 0 to sysctl_lowmem_reserve_ratio[ZONE_HIGHMEM]. And, defining sysctl_lowmem_reserve_ratio array by MAX_NR_ZONES - 1 size makes code complex. For example, if there is highmem system, following reserve ratio is activated for *NORMAL ZONE* which would be easyily misleading people. #ifdef CONFIG_HIGHMEM 32 #endif This patch also fixes this situation by defining sysctl_lowmem_reserve_ratio array by MAX_NR_ZONES and place "#ifdef" to right place. Link: http://lkml.kernel.org/r/1504672525-17915-1-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Reviewed-by: Aneesh Kumar K.V Acked-by: Vlastimil Babka Tested-by: Tony Lindgren Cc: Michal Hocko Cc: Vlastimil Babka Cc: Mel Gorman Cc: Johannes Weiner Cc: "Aneesh Kumar K . V" Cc: Minchan Kim Cc: Rik van Riel Cc: Laura Abbott Cc: Marek Szyprowski Cc: Michal Nazarewicz Cc: Russell King Cc: Will Deacon Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a0c9e45a859a..32699b2dc52a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -885,7 +885,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; +extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, -- cgit v1.2.3 From bad8c6c0b1144694ecb0bc5629ede9b8b578b86e Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Tue, 10 Apr 2018 16:30:15 -0700 Subject: mm/cma: manage the memory of the CMA area by using the ZONE_MOVABLE Patch series "mm/cma: manage the memory of the CMA area by using the ZONE_MOVABLE", v2. 0. History This patchset is the follow-up of the discussion about the "Introduce ZONE_CMA (v7)" [1]. Please reference it if more information is needed. 1. What does this patch do? This patch changes the management way for the memory of the CMA area in the MM subsystem. Currently the memory of the CMA area is managed by the zone where their pfn is belong to. However, this approach has some problems since MM subsystem doesn't have enough logic to handle the situation that different characteristic memories are in a single zone. To solve this issue, this patch try to manage all the memory of the CMA area by using the MOVABLE zone. In MM subsystem's point of view, characteristic of the memory on the MOVABLE zone and the memory of the CMA area are the same. So, managing the memory of the CMA area by using the MOVABLE zone will not have any problem. 2. Motivation There are some problems with current approach. See following. Although these problem would not be inherent and it could be fixed without this conception change, it requires many hooks addition in various code path and it would be intrusive to core MM and would be really error-prone. Therefore, I try to solve them with this new approach. Anyway, following is the problems of the current implementation. o CMA memory utilization First, following is the freepage calculation logic in MM. - For movable allocation: freepage = total freepage - For unmovable allocation: freepage = total freepage - CMA freepage Freepages on the CMA area is used after the normal freepages in the zone where the memory of the CMA area is belong to are exhausted. At that moment that the number of the normal freepages is zero, so - For movable allocation: freepage = total freepage = CMA freepage - For unmovable allocation: freepage = 0 If unmovable allocation comes at this moment, allocation request would fail to pass the watermark check and reclaim is started. After reclaim, there would exist the normal freepages so freepages on the CMA areas would not be used. FYI, there is another attempt [2] trying to solve this problem in lkml. And, as far as I know, Qualcomm also has out-of-tree solution for this problem. Useless reclaim: There is no logic to distinguish CMA pages in the reclaim path. Hence, CMA page is reclaimed even if the system just needs the page that can be usable for the kernel allocation. Atomic allocation failure: This is also related to the fallback allocation policy for the memory of the CMA area. Consider the situation that the number of the normal freepages is *zero* since the bunch of the movable allocation requests come. Kswapd would not be woken up due to following freepage calculation logic. - For movable allocation: freepage = total freepage = CMA freepage If atomic unmovable allocation request comes at this moment, it would fails due to following logic. - For unmovable allocation: freepage = total freepage - CMA freepage = 0 It was reported by Aneesh [3]. Useless compaction: Usual high-order allocation request is unmovable allocation request and it cannot be served from the memory of the CMA area. In compaction, migration scanner try to migrate the page in the CMA area and make high-order page there. As mentioned above, it cannot be usable for the unmovable allocation request so it's just waste. 3. Current approach and new approach Current approach is that the memory of the CMA area is managed by the zone where their pfn is belong to. However, these memory should be distinguishable since they have a strong limitation. So, they are marked as MIGRATE_CMA in pageblock flag and handled specially. However, as mentioned in section 2, the MM subsystem doesn't have enough logic to deal with this special pageblock so many problems raised. New approach is that the memory of the CMA area is managed by the MOVABLE zone. MM already have enough logic to deal with special zone like as HIGHMEM and MOVABLE zone. So, managing the memory of the CMA area by the MOVABLE zone just naturally work well because constraints for the memory of the CMA area that the memory should always be migratable is the same with the constraint for the MOVABLE zone. There is one side-effect for the usability of the memory of the CMA area. The use of MOVABLE zone is only allowed for a request with GFP_HIGHMEM && GFP_MOVABLE so now the memory of the CMA area is also only allowed for this gfp flag. Before this patchset, a request with GFP_MOVABLE can use them. IMO, It would not be a big issue since most of GFP_MOVABLE request also has GFP_HIGHMEM flag. For example, file cache page and anonymous page. However, file cache page for blockdev file is an exception. Request for it has no GFP_HIGHMEM flag. There is pros and cons on this exception. In my experience, blockdev file cache pages are one of the top reason that causes cma_alloc() to fail temporarily. So, we can get more guarantee of cma_alloc() success by discarding this case. Note that there is no change in admin POV since this patchset is just for internal implementation change in MM subsystem. Just one minor difference for admin is that the memory stat for CMA area will be printed in the MOVABLE zone. That's all. 4. Result Following is the experimental result related to utilization problem. 8 CPUs, 1024 MB, VIRTUAL MACHINE make -j16 CMA area: 0 MB 512 MB Elapsed-time: 92.4 186.5 pswpin: 82 18647 pswpout: 160 69839 CMA : 0 MB 512 MB Elapsed-time: 93.1 93.4 pswpin: 84 46 pswpout: 183 92 akpm: "kernel test robot" reported a 26% improvement in vm-scalability.throughput: http://lkml.kernel.org/r/20180330012721.GA3845@yexl-desktop [1]: lkml.kernel.org/r/1491880640-9944-1-git-send-email-iamjoonsoo.kim@lge.com [2]: https://lkml.org/lkml/2014/10/15/623 [3]: http://www.spinics.net/lists/linux-mm/msg100562.html Link: http://lkml.kernel.org/r/1512114786-5085-2-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Reviewed-by: Aneesh Kumar K.V Tested-by: Tony Lindgren Acked-by: Vlastimil Babka Cc: Johannes Weiner Cc: Laura Abbott Cc: Marek Szyprowski Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Nazarewicz Cc: Minchan Kim Cc: Rik van Riel Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 3 --- include/linux/mm.h | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 2b0265265c28..e0e49b5b1ee1 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -216,9 +216,6 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); -extern void set_zone_contiguous(struct zone *zone); -extern void clear_zone_contiguous(struct zone *zone); - #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ diff --git a/include/linux/mm.h b/include/linux/mm.h index 3ad632366973..342c441c25d0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2108,6 +2108,7 @@ extern void setup_per_cpu_pageset(void); extern void zone_pcp_update(struct zone *zone); extern void zone_pcp_reset(struct zone *zone); +extern void setup_zone_pageset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; -- cgit v1.2.3 From 0e3dc019143104a6e676287b1e453cccd7add404 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 10 Apr 2018 16:30:44 -0700 Subject: procfs: add seq_put_hex_ll to speed up /proc/pid/maps seq_put_hex_ll() prints a number in hexadecimal notation and works faster than seq_printf(). == test.py num = 0 with open("/proc/1/maps") as f: while num < 10000 : data = f.read() f.seek(0, 0) num = num + 1 == == Before patch == $ time python test.py real 0m1.561s user 0m0.257s sys 0m1.302s == After patch == $ time python test.py real 0m0.986s user 0m0.279s sys 0m0.707s $ perf -g record python test.py: == Before patch == - 67.42% 2.82% python [kernel.kallsyms] [k] show_map_vma.isra.22 - 64.60% show_map_vma.isra.22 - 44.98% seq_printf - seq_vprintf - vsnprintf + 14.85% number + 12.22% format_decode 5.56% memcpy_erms + 15.06% seq_path + 4.42% seq_pad + 2.45% __GI___libc_read == After patch == - 47.35% 3.38% python [kernel.kallsyms] [k] show_map_vma.isra.23 - 43.97% show_map_vma.isra.23 + 20.84% seq_path - 15.73% show_vma_header_prefix 10.55% seq_put_hex_ll + 2.65% seq_put_decimal_ull 0.95% seq_putc + 6.96% seq_pad + 2.94% __GI___libc_read [avagin@openvz.org: use unsigned int instead of int where it is suitable] Link: http://lkml.kernel.org/r/20180214025619.4005-1-avagin@openvz.org [avagin@openvz.org: v2] Link: http://lkml.kernel.org/r/20180117082050.25406-1-avagin@openvz.org Link: http://lkml.kernel.org/r/20180112185812.7710-1-avagin@openvz.org Signed-off-by: Andrei Vagin Cc: Alexey Dobriyan Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index ab437dd2e3b9..599e145f4917 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -121,6 +121,9 @@ void seq_puts(struct seq_file *m, const char *s); void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num); void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num); +void seq_put_hex_ll(struct seq_file *m, const char *delimiter, + unsigned long long v, unsigned int width); + void seq_escape(struct seq_file *m, const char *s, const char *esc); void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, -- cgit v1.2.3 From d1be35cb6f96975d792a1535d3fe9b75239065ee Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 10 Apr 2018 16:31:16 -0700 Subject: proc: add seq_put_decimal_ull_width to speed up /proc/pid/smaps seq_put_decimal_ull_w(m, str, val, width) prints a decimal number with a specified minimal field width. It is equivalent of seq_printf(m, "%s%*d", str, width, val), but it works much faster. == test_smaps.py num = 0 with open("/proc/1/smaps") as f: for x in xrange(10000): data = f.read() f.seek(0, 0) == == Before patch == $ time python test_smaps.py real 0m4.593s user 0m0.398s sys 0m4.158s == After patch == $ time python test_smaps.py real 0m3.828s user 0m0.413s sys 0m3.408s $ perf -g record python test_smaps.py == Before patch == - 79.01% 3.36% python [kernel.kallsyms] [k] show_smap.isra.33 - 75.65% show_smap.isra.33 + 48.85% seq_printf + 15.75% __walk_page_range + 9.70% show_map_vma.isra.23 0.61% seq_puts == After patch == - 75.51% 4.62% python [kernel.kallsyms] [k] show_smap.isra.33 - 70.88% show_smap.isra.33 + 24.82% seq_put_decimal_ull_w + 19.78% __walk_page_range + 12.74% seq_printf + 11.08% show_map_vma.isra.23 + 1.68% seq_puts [akpm@linux-foundation.org: fix drivers/of/unittest.c build] Link: http://lkml.kernel.org/r/20180212074931.7227-1-avagin@openvz.org Signed-off-by: Andrei Vagin Cc: Alexey Dobriyan Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 3 ++- include/linux/seq_file.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 52b70894eaa5..98273343bd45 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -439,7 +439,8 @@ extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); -extern int num_to_str(char *buf, int size, unsigned long long num); +extern int num_to_str(char *buf, int size, + unsigned long long num, unsigned int width); /* lib/printf utilities */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 599e145f4917..23d6a92cea9f 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -118,6 +118,8 @@ __printf(2, 3) void seq_printf(struct seq_file *m, const char *fmt, ...); void seq_putc(struct seq_file *m, char c); void seq_puts(struct seq_file *m, const char *s); +void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, + unsigned long long num, unsigned int width); void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num); void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num); -- cgit v1.2.3 From 47d4b263a2f7324fb3cb641ca00b2725dd12dea0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Apr 2018 16:32:26 -0700 Subject: taint: convert to indexed initialization This converts to using indexed initializers instead of comments, adds a comment on why the taint flags can't be an enum, and make sure that no one forgets to update the taint_flags when adding new bits. Link: http://lkml.kernel.org/r/1519084390-43867-2-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Reviewed-by: Andrew Morton Cc: Al Viro Cc: Alexey Dobriyan Cc: Jonathan Corbet Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 98273343bd45..086e8e80f765 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -544,6 +544,7 @@ extern enum system_states { SYSTEM_RESTART, } system_state; +/* This cannot be an enum because some may be used in assembly source. */ #define TAINT_PROPRIETARY_MODULE 0 #define TAINT_FORCED_MODULE 1 #define TAINT_CPU_OUT_OF_SPEC 2 -- cgit v1.2.3 From bc4f2f5469ac2a52affadc4c00c1276d76151a39 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Apr 2018 16:32:33 -0700 Subject: taint: add taint for randstruct Since the randstruct plugin can intentionally produce extremely unusual kernel structure layouts (even performance pathological ones), some maintainers want to be able to trivially determine if an Oops is coming from a randstruct-built kernel, so as to keep their sanity when debugging. This adds the new flag and initializes taint_mask immediately when built with randstruct. Link: http://lkml.kernel.org/r/1519084390-43867-4-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Reviewed-by: Andrew Morton Cc: Al Viro Cc: Alexey Dobriyan Cc: Jonathan Corbet Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 086e8e80f765..6a1eb0b0aad9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -562,7 +562,8 @@ extern enum system_states { #define TAINT_SOFTLOCKUP 14 #define TAINT_LIVEPATCH 15 #define TAINT_AUX 16 -#define TAINT_FLAGS_COUNT 17 +#define TAINT_RANDSTRUCT 17 +#define TAINT_FLAGS_COUNT 18 struct taint_flag { char c_true; /* character printed when tainted */ -- cgit v1.2.3 From 3ea056c50476f877f8bceb560ab69871098cb3a9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 10 Apr 2018 16:32:36 -0700 Subject: uts: create "struct uts_namespace" from kmem_cache So "struct uts_namespace" can enjoy fine-grained SLAB debugging and usercopy protection. I'd prefer shorter name "utsns" but there is "user_namespace" already. Link: http://lkml.kernel.org/r/20180228215158.GA23146@avx2 Signed-off-by: Alexey Dobriyan Reviewed-by: Andrew Morton Cc: "Eric W. Biederman" Cc: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/utsname.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/utsname.h b/include/linux/utsname.h index c8060c2ecd04..44429d9142ca 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -44,6 +44,8 @@ static inline void put_uts_ns(struct uts_namespace *ns) { kref_put(&ns->kref, free_uts_ns); } + +void uts_ns_init(void); #else static inline void get_uts_ns(struct uts_namespace *ns) { @@ -61,6 +63,10 @@ static inline struct uts_namespace *copy_utsname(unsigned long flags, return old_ns; } + +static inline void uts_ns_init(void) +{ +} #endif #ifdef CONFIG_PROC_SYSCTL -- cgit v1.2.3 From 2cfe0d3009418a132b93d78642a8059a38fe5944 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Apr 2018 16:32:44 -0700 Subject: task_struct: only use anon struct under randstruct plugin The original intent for always adding the anonymous struct in task_struct was to make sure we had compiler coverage. However, this caused pathological padding of 40 bytes at the start of task_struct. Instead, move the anonymous struct to being only used when struct layout randomization is enabled. Link: http://lkml.kernel.org/r/20180327213609.GA2964@beast Fixes: 29e48ce87f1e ("task_struct: Allow randomized") Signed-off-by: Kees Cook Reported-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 3 --- include/linux/compiler-gcc.h | 12 +++--------- 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index d3f264a5b04d..ceb96ecab96e 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -17,9 +17,6 @@ */ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) -#define randomized_struct_fields_start struct { -#define randomized_struct_fields_end }; - /* all clang versions usable with the kernel support KASAN ABI version 5 */ #define KASAN_ABI_VERSION 5 diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e2c7f4369eff..b4bf73f5e38f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -242,6 +242,9 @@ #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__) #define __randomize_layout __attribute__((randomize_layout)) #define __no_randomize_layout __attribute__((no_randomize_layout)) +/* This anon struct can add padding, so only enable it under randstruct. */ +#define randomized_struct_fields_start struct { +#define randomized_struct_fields_end } __randomize_layout; #endif #endif /* GCC_VERSION >= 40500 */ @@ -256,15 +259,6 @@ */ #define __visible __attribute__((externally_visible)) -/* - * RANDSTRUCT_PLUGIN wants to use an anonymous struct, but it is only - * possible since GCC 4.6. To provide as much build testing coverage - * as possible, this is used for all GCC 4.6+ builds, and not just on - * RANDSTRUCT_PLUGIN builds. - */ -#define randomized_struct_fields_start struct { -#define randomized_struct_fields_end } __randomize_layout; - #endif /* GCC_VERSION >= 40600 */ -- cgit v1.2.3 From 0965232035cfa59a64d197cf8a8ee0bc407bb3e4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 10 Apr 2018 16:34:45 -0700 Subject: seq_file: allocate seq_file from kmem_cache For fine-grained debugging and usercopy protection. Link: http://lkml.kernel.org/r/20180310085027.GA17121@avx2 Signed-off-by: Alexey Dobriyan Reviewed-by: Andrew Morton Cc: Al Viro Cc: Glauber Costa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 23d6a92cea9f..a121982af0f5 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -240,4 +240,5 @@ extern struct hlist_node *seq_hlist_start_percpu(struct hlist_head __percpu *hea extern struct hlist_node *seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, int *cpu, loff_t *pos); +void seq_file_init(void); #endif -- cgit v1.2.3 From 8f2af155b513583e8b149a384551f13e1ac5dc72 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Apr 2018 16:34:53 -0700 Subject: exec: pass stack rlimit into mm layout functions Patch series "exec: Pin stack limit during exec". Attempts to solve problems with the stack limit changing during exec continue to be frustrated[1][2]. In addition to the specific issues around the Stack Clash family of flaws, Andy Lutomirski pointed out[3] other places during exec where the stack limit is used and is assumed to be unchanging. Given the many places it gets used and the fact that it can be manipulated/raced via setrlimit() and prlimit(), I think the only way to handle this is to move away from the "current" view of the stack limit and instead attach it to the bprm, and plumb this down into the functions that need to know the stack limits. This series implements the approach. [1] 04e35f4495dd ("exec: avoid RLIMIT_STACK races with prlimit()") [2] 779f4e1c6c7c ("Revert "exec: avoid RLIMIT_STACK races with prlimit()"") [3] to security@kernel.org, "Subject: existing rlimit races?" This patch (of 3): Since it is possible that the stack rlimit can change externally during exec (either via another thread calling setrlimit() or another process calling prlimit()), provide a way to pass the rlimit down into the per-architecture mm layout functions so that the rlimit can stay in the bprm structure instead of sitting in the signal structure until exec is finalized. Link: http://lkml.kernel.org/r/1518638796-20819-2-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Cc: Michal Hocko Cc: Ben Hutchings Cc: Willy Tarreau Cc: Hugh Dickins Cc: Oleg Nesterov Cc: "Jason A. Donenfeld" Cc: Rik van Riel Cc: Laura Abbott Cc: Greg KH Cc: Andy Lutomirski Cc: Ben Hutchings Cc: Brad Spengler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 9806184bb3d5..2c570cd934af 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -104,7 +104,8 @@ static inline void mm_update_next_owner(struct mm_struct *mm) #endif /* CONFIG_MEMCG */ #ifdef CONFIG_MMU -extern void arch_pick_mmap_layout(struct mm_struct *mm); +extern void arch_pick_mmap_layout(struct mm_struct *mm, + struct rlimit *rlim_stack); extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); @@ -113,7 +114,8 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #else -static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} +static inline void arch_pick_mmap_layout(struct mm_struct *mm, + struct rlimit *rlim_stack) {} #endif static inline bool in_vfork(struct task_struct *tsk) -- cgit v1.2.3 From b83838313386f617d6bd8201be7f5b532059bba1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Apr 2018 16:34:57 -0700 Subject: exec: introduce finalize_exec() before start_thread() Provide a final callback into fs/exec.c before start_thread() takes over, to handle any last-minute changes, like the coming restoration of the stack limit. Link: http://lkml.kernel.org/r/1518638796-20819-3-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Ben Hutchings Cc: Ben Hutchings Cc: Brad Spengler Cc: Greg KH Cc: Hugh Dickins Cc: "Jason A. Donenfeld" Cc: Laura Abbott Cc: Michal Hocko Cc: Oleg Nesterov Cc: Rik van Riel Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b0abe21d6cc9..40e52afbb2b0 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -118,6 +118,7 @@ extern int __must_check remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *); extern int flush_old_exec(struct linux_binprm * bprm); extern void setup_new_exec(struct linux_binprm * bprm); +extern void finalize_exec(struct linux_binprm *bprm); extern void would_dump(struct linux_binprm *, struct file *); extern int suid_dumpable; -- cgit v1.2.3 From c31dbb146dd44af44bc60780ce8fa7a9f5f746df Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Apr 2018 16:35:01 -0700 Subject: exec: pin stack limit during exec Since the stack rlimit is used in multiple places during exec and it can be changed via other threads (via setrlimit()) or processes (via prlimit()), the assumption that the value doesn't change cannot be made. This leads to races with mm layout selection and argument size calculations. This changes the exec path to use the rlimit stored in bprm instead of in current. Before starting the thread, the bprm stack rlimit is stored back to current. Link: http://lkml.kernel.org/r/1518638796-20819-4-git-send-email-keescook@chromium.org Fixes: 64701dee4178e ("exec: Use sane stack rlimit under secureexec") Signed-off-by: Kees Cook Reported-by: Ben Hutchings Reported-by: Andy Lutomirski Reported-by: Brad Spengler Acked-by: Michal Hocko Cc: Ben Hutchings Cc: Greg KH Cc: Hugh Dickins Cc: "Jason A. Donenfeld" Cc: Laura Abbott Cc: Oleg Nesterov Cc: Rik van Riel Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 40e52afbb2b0..4955e0863b83 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -61,6 +61,8 @@ struct linux_binprm { unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; + + struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */ } __randomize_layout; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 -- cgit v1.2.3 From de99626c2e89713cd29860ca26b584d1e6219da0 Mon Sep 17 00:00:00 2001 From: Valentin Vidic Date: Tue, 10 Apr 2018 16:35:46 -0700 Subject: include/linux/kfifo.h: fix comment Clean up unusual formatting in the note about locking. Link: http://lkml.kernel.org/r/20180324002630.13046-1-Valentin.Vidic@CARNet.hr Signed-off-by: Valentin Vidic Cc: Stefani Seibold Cc: Mauro Carvalho Chehab Cc: Christophe JAILLET Cc: Jiri Kosina Cc: Sean Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index e251533a5939..89fc8dc7bf38 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -41,11 +41,11 @@ */ /* - * Note about locking : There is no locking required until only * one reader - * and one writer is using the fifo and no kfifo_reset() will be * called - * kfifo_reset_out() can be safely used, until it will be only called + * Note about locking: There is no locking required until only one reader + * and one writer is using the fifo and no kfifo_reset() will be called. + * kfifo_reset_out() can be safely used, until it will be only called * in the reader thread. - * For multiple writer and one reader there is only a need to lock the writer. + * For multiple writer and one reader there is only a need to lock the writer. * And vice versa for only one writer and multiple reader there is only a need * to lock the reader. */ -- cgit v1.2.3 From 2dd8a62c647691161a2346546834262597739872 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 10 Apr 2018 16:36:19 -0700 Subject: linux/const.h: move UL() macro to include/linux/const.h ARM, ARM64 and UniCore32 duplicate the definition of UL(): #define UL(x) _AC(x, UL) This is not actually arch-specific, so it will be useful to move it to a common header. Currently, we only have the uapi variant for linux/const.h, so I am creating include/linux/const.h. I also added _UL(), _ULL() and ULL() because _AC() is mostly used in the form either _AC(..., UL) or _AC(..., ULL). I expect they will be replaced in follow-up cleanups. The underscore-prefixed ones should be used for exported headers. Link: http://lkml.kernel.org/r/1519301715-31798-4-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Guan Xuetao Acked-by: Catalin Marinas Acked-by: Russell King Cc: David Howells Cc: Geert Uytterhoeven Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/const.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/linux/const.h (limited to 'include/linux') diff --git a/include/linux/const.h b/include/linux/const.h new file mode 100644 index 000000000000..7b55a55f5911 --- /dev/null +++ b/include/linux/const.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_CONST_H +#define _LINUX_CONST_H + +#include + +#define UL(x) (_UL(x)) +#define ULL(x) (_ULL(x)) + +#endif /* _LINUX_CONST_H */ -- cgit v1.2.3 From fa290cda102c096f5ca394277d65d3dbd689930b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 10 Apr 2018 16:36:28 -0700 Subject: radix tree: use GFP_ZONEMASK bits of gfp_t for flags Patch series "XArray", v9. (First part thereof). This patchset is, I believe, appropriate for merging for 4.17. It contains the XArray implementation, to eventually replace the radix tree, and converts the page cache to use it. This conversion keeps the radix tree and XArray data structures in sync at all times. That allows us to convert the page cache one function at a time and should allow for easier bisection. Other than renaming some elements of the structures, the data structures are fundamentally unchanged; a radix tree walk and an XArray walk will touch the same number of cachelines. I have changes planned to the XArray data structure, but those will happen in future patches. Improvements the XArray has over the radix tree: - The radix tree provides operations like other trees do; 'insert' and 'delete'. But what most users really want is an automatically resizing array, and so it makes more sense to give users an API that is like an array -- 'load' and 'store'. We still have an 'insert' operation for users that really want that semantic. - The XArray considers locking as part of its API. This simplifies a lot of users who formerly had to manage their own locking just for the radix tree. It also improves code generation as we can now tell RCU that we're holding a lock and it doesn't need to generate as much fencing code. The other advantage is that tree nodes can be moved (not yet implemented). - GFP flags are now parameters to calls which may need to allocate memory. The radix tree forced users to decide what the allocation flags would be at creation time. It's much clearer to specify them at allocation time. - Memory is not preloaded; we don't tie up dozens of pages on the off chance that the slab allocator fails. Instead, we drop the lock, allocate a new node and retry the operation. We have to convert all the radix tree, IDA and IDR preload users before we can realise this benefit, but I have not yet found a user which cannot be converted. - The XArray provides a cmpxchg operation. The radix tree forces users to roll their own (and at least four have). - Iterators take a 'max' parameter. That simplifies many users and will reduce the amount of iteration done. - Iteration can proceed backwards. We only have one user for this, but since it's called as part of the pagefault readahead algorithm, that seemed worth mentioning. - RCU-protected pointers are not exposed as part of the API. There are some fun bugs where the page cache forgets to use rcu_dereference() in the current codebase. - Value entries gain an extra bit compared to radix tree exceptional entries. That gives us the extra bit we need to put huge page swap entries in the page cache. - Some iterators now take a 'filter' argument instead of having separate iterators for tagged/untagged iterations. The page cache is improved by this: - Shorter, easier to read code - More efficient iterations - Reduction in size of struct address_space - Fewer walks from the top of the data structure; the XArray API encourages staying at the leaf node and conducting operations there. This patch (of 8): None of these bits may be used for slab allocations, so we can use them as radix tree flags as long as we mask them off before passing them to the slab allocator. Move the IDR flag from the high bits to the GFP_ZONEMASK bits. Link: http://lkml.kernel.org/r/20180313132639.17387-3-willy@infradead.org Signed-off-by: Matthew Wilcox Acked-by: Jeff Layton Cc: Darrick J. Wong Cc: Dave Chinner Cc: Ryusuke Konishi Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 3 ++- include/linux/radix-tree.h | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 7d6a6313f0ab..913c335054f0 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -29,7 +29,8 @@ struct idr { #define IDR_FREE 0 /* Set the IDR flag and the IDR_FREE tag */ -#define IDR_RT_MARKER ((__force gfp_t)(3 << __GFP_BITS_SHIFT)) +#define IDR_RT_MARKER (ROOT_IS_IDR | (__force gfp_t) \ + (1 << (ROOT_TAG_SHIFT + IDR_FREE))) #define IDR_INIT_BASE(base) { \ .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER), \ diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index fc55ff31eca7..6c4e2e716dac 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -104,9 +104,10 @@ struct radix_tree_node { unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; -/* The top bits of gfp_mask are used to store the root tags and the IDR flag */ -#define ROOT_IS_IDR ((__force gfp_t)(1 << __GFP_BITS_SHIFT)) -#define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT + 1) +/* The IDR tag is stored in the low bits of the GFP flags */ +#define ROOT_IS_IDR ((__force gfp_t)4) +/* The top bits of gfp_mask are used to store the root tags */ +#define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT) struct radix_tree_root { gfp_t gfp_mask; -- cgit v1.2.3 From f82b376413298ddd39a2391e38260c15cdebf380 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 10 Apr 2018 16:36:44 -0700 Subject: export __set_page_dirty XFS currently contains a copy-and-paste of __set_page_dirty(). Export it from buffer.c instead. Link: http://lkml.kernel.org/r/20180313132639.17387-6-willy@infradead.org Signed-off-by: Matthew Wilcox Acked-by: Jeff Layton Reviewed-by: Darrick J. Wong Cc: Ryusuke Konishi Cc: Dave Chinner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 342c441c25d0..f13bc25f7a9f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1466,6 +1466,7 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); +void __set_page_dirty(struct page *, struct address_space *, int warn); int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, -- cgit v1.2.3 From f6bb2a2c0b81c47282ddb7883f92e65a063c27dd Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 10 Apr 2018 16:36:52 -0700 Subject: xarray: add the xa_lock to the radix_tree_root This results in no change in structure size on 64-bit machines as it fits in the padding between the gfp_t and the void *. 32-bit machines will grow the structure from 8 to 12 bytes. Almost all radix trees are protected with (at least) a spinlock, so as they are converted from radix trees to xarrays, the data structures will shrink again. Initialising the spinlock requires a name for the benefit of lockdep, so RADIX_TREE_INIT() now needs to know the name of the radix tree it's initialising, and so do IDR_INIT() and IDA_INIT(). Also add the xa_lock() and xa_unlock() family of wrappers to make it easier to use the lock. If we could rely on -fplan9-extensions in the compiler, we could avoid all of this syntactic sugar, but that wasn't added until gcc 4.6. Link: http://lkml.kernel.org/r/20180313132639.17387-8-willy@infradead.org Signed-off-by: Matthew Wilcox Reviewed-by: Jeff Layton Cc: Darrick J. Wong Cc: Dave Chinner Cc: Ryusuke Konishi Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 19 ++++++++++--------- include/linux/radix-tree.h | 7 +++++-- include/linux/xarray.h | 24 ++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 11 deletions(-) create mode 100644 include/linux/xarray.h (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 913c335054f0..e856f4e0ab35 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -32,27 +32,28 @@ struct idr { #define IDR_RT_MARKER (ROOT_IS_IDR | (__force gfp_t) \ (1 << (ROOT_TAG_SHIFT + IDR_FREE))) -#define IDR_INIT_BASE(base) { \ - .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER), \ +#define IDR_INIT_BASE(name, base) { \ + .idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER), \ .idr_base = (base), \ .idr_next = 0, \ } /** * IDR_INIT() - Initialise an IDR. + * @name: Name of IDR. * * A freshly-initialised IDR contains no IDs. */ -#define IDR_INIT IDR_INIT_BASE(0) +#define IDR_INIT(name) IDR_INIT_BASE(name, 0) /** - * DEFINE_IDR() - Define a statically-allocated IDR - * @name: Name of IDR + * DEFINE_IDR() - Define a statically-allocated IDR. + * @name: Name of IDR. * * An IDR defined using this macro is ready for use with no additional * initialisation required. It contains no IDs. */ -#define DEFINE_IDR(name) struct idr name = IDR_INIT +#define DEFINE_IDR(name) struct idr name = IDR_INIT(name) /** * idr_get_cursor - Return the current position of the cyclic allocator @@ -219,10 +220,10 @@ struct ida { struct radix_tree_root ida_rt; }; -#define IDA_INIT { \ - .ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT), \ +#define IDA_INIT(name) { \ + .ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT), \ } -#define DEFINE_IDA(name) struct ida name = IDA_INIT +#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) int ida_pre_get(struct ida *ida, gfp_t gfp_mask); int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 6c4e2e716dac..34149e8b5f73 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -110,20 +110,23 @@ struct radix_tree_node { #define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT) struct radix_tree_root { + spinlock_t xa_lock; gfp_t gfp_mask; struct radix_tree_node __rcu *rnode; }; -#define RADIX_TREE_INIT(mask) { \ +#define RADIX_TREE_INIT(name, mask) { \ + .xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock), \ .gfp_mask = (mask), \ .rnode = NULL, \ } #define RADIX_TREE(name, mask) \ - struct radix_tree_root name = RADIX_TREE_INIT(mask) + struct radix_tree_root name = RADIX_TREE_INIT(name, mask) #define INIT_RADIX_TREE(root, mask) \ do { \ + spin_lock_init(&(root)->xa_lock); \ (root)->gfp_mask = (mask); \ (root)->rnode = NULL; \ } while (0) diff --git a/include/linux/xarray.h b/include/linux/xarray.h new file mode 100644 index 000000000000..2dfc8006fe64 --- /dev/null +++ b/include/linux/xarray.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _LINUX_XARRAY_H +#define _LINUX_XARRAY_H +/* + * eXtensible Arrays + * Copyright (c) 2017 Microsoft Corporation + * Author: Matthew Wilcox + */ + +#include + +#define xa_trylock(xa) spin_trylock(&(xa)->xa_lock) +#define xa_lock(xa) spin_lock(&(xa)->xa_lock) +#define xa_unlock(xa) spin_unlock(&(xa)->xa_lock) +#define xa_lock_bh(xa) spin_lock_bh(&(xa)->xa_lock) +#define xa_unlock_bh(xa) spin_unlock_bh(&(xa)->xa_lock) +#define xa_lock_irq(xa) spin_lock_irq(&(xa)->xa_lock) +#define xa_unlock_irq(xa) spin_unlock_irq(&(xa)->xa_lock) +#define xa_lock_irqsave(xa, flags) \ + spin_lock_irqsave(&(xa)->xa_lock, flags) +#define xa_unlock_irqrestore(xa, flags) \ + spin_unlock_irqrestore(&(xa)->xa_lock, flags) + +#endif /* _LINUX_XARRAY_H */ -- cgit v1.2.3 From b93b016313b3ba8003c3b8bb71f569af91f19fc7 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 10 Apr 2018 16:36:56 -0700 Subject: page cache: use xa_lock Remove the address_space ->tree_lock and use the xa_lock newly added to the radix_tree_root. Rename the address_space ->page_tree to ->i_pages, since we don't really care that it's a tree. [willy@infradead.org: fix nds32, fs/dax.c] Link: http://lkml.kernel.org/r/20180406145415.GB20605@bombadil.infradead.orgLink: http://lkml.kernel.org/r/20180313132639.17387-9-willy@infradead.org Signed-off-by: Matthew Wilcox Acked-by: Jeff Layton Cc: Darrick J. Wong Cc: Dave Chinner Cc: Ryusuke Konishi Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 14 +++++++------- include/linux/fs.h | 8 ++++---- include/linux/mm.h | 2 +- include/linux/pagemap.h | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e6cbb915ee56..09da0f124699 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -329,7 +329,7 @@ static inline bool inode_to_wb_is_valid(struct inode *inode) * @inode: inode of interest * * Returns the wb @inode is currently associated with. The caller must be - * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the + * holding either @inode->i_lock, the i_pages lock, or the * associated wb's list_lock. */ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) @@ -337,7 +337,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) #ifdef CONFIG_LOCKDEP WARN_ON_ONCE(debug_locks && (!lockdep_is_held(&inode->i_lock) && - !lockdep_is_held(&inode->i_mapping->tree_lock) && + !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) && !lockdep_is_held(&inode->i_wb->list_lock))); #endif return inode->i_wb; @@ -349,7 +349,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) * @lockedp: temp bool output param, to be passed to the end function * * The caller wants to access the wb associated with @inode but isn't - * holding inode->i_lock, mapping->tree_lock or wb->list_lock. This + * holding inode->i_lock, the i_pages lock or wb->list_lock. This * function determines the wb associated with @inode and ensures that the * association doesn't change until the transaction is finished with * unlocked_inode_to_wb_end(). @@ -370,11 +370,11 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; if (unlikely(*lockedp)) - spin_lock_irq(&inode->i_mapping->tree_lock); + xa_lock_irq(&inode->i_mapping->i_pages); /* - * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock. - * inode_to_wb() will bark. Deref directly. + * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages + * lock. inode_to_wb() will bark. Deref directly. */ return inode->i_wb; } @@ -387,7 +387,7 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) { if (unlikely(locked)) - spin_unlock_irq(&inode->i_mapping->tree_lock); + xa_unlock_irq(&inode->i_mapping->i_pages); rcu_read_unlock(); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 2aa02cad94d4..92efaf1f8977 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -390,12 +391,11 @@ int pagecache_write_end(struct file *, struct address_space *mapping, struct address_space { struct inode *host; /* owner: inode, block_device */ - struct radix_tree_root page_tree; /* radix tree of all pages */ - spinlock_t tree_lock; /* and lock protecting it */ + struct radix_tree_root i_pages; /* cached pages */ atomic_t i_mmap_writable;/* count VM_SHARED mappings */ struct rb_root_cached i_mmap; /* tree of private and shared mappings */ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ - /* Protected by tree_lock together with the radix tree */ + /* Protected by the i_pages lock */ unsigned long nrpages; /* number of total pages */ /* number of shadow or DAX exceptional entries */ unsigned long nrexceptional; @@ -1989,7 +1989,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) * * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to * synchronize competing switching instances and to tell - * wb stat updates to grab mapping->tree_lock. See + * wb stat updates to grab the i_pages lock. See * inode_switch_wb_work_fn() for details. * * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper diff --git a/include/linux/mm.h b/include/linux/mm.h index f13bc25f7a9f..1ac1f06a4be6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -747,7 +747,7 @@ int finish_mkwrite_fault(struct vm_fault *vmf); * refcount. The each user mapping also has a reference to the page. * * The pagecache pages are stored in a per-mapping radix tree, which is - * rooted at mapping->page_tree, and indexed by offset. + * rooted at mapping->i_pages, and indexed by offset. * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space * lists, we instead now tag pages as dirty/writeback in the radix tree. * diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 34ce3ebf97d5..b1bd2186e6d2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -144,7 +144,7 @@ void release_pages(struct page **pages, int nr); * 3. check the page is still in pagecache (if no, goto 1) * * Remove-side that cares about stability of _refcount (eg. reclaim) has the - * following (with tree_lock held for write): + * following (with the i_pages lock held): * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg) * B. remove page from pagecache * C. free the page @@ -157,7 +157,7 @@ void release_pages(struct page **pages, int nr); * * It is possible that between 1 and 2, the page is removed then the exact same * page is inserted into the same position in pagecache. That's OK: the - * old find_get_page using tree_lock could equally have run before or after + * old find_get_page using a lock could equally have run before or after * such a re-insertion, depending on order that locks are granted. * * Lookups racing against pagecache insertion isn't a big problem: either 1 -- cgit v1.2.3 From 450b1f6f56350c630e795f240dc5a77aa8aa2419 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 29 Mar 2018 08:07:46 +0100 Subject: lockref: Add lockref_put_not_zero Put a lockref unless the lockref is dead or its count would become zero. This is the same as lockref_put_or_lock except that the lock is never left held. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- include/linux/lockref.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 2eac32095113..99f17cc8e163 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -37,6 +37,7 @@ struct lockref { extern void lockref_get(struct lockref *); extern int lockref_put_return(struct lockref *); extern int lockref_get_not_zero(struct lockref *); +extern int lockref_put_not_zero(struct lockref *); extern int lockref_get_or_lock(struct lockref *); extern int lockref_put_or_lock(struct lockref *); -- cgit v1.2.3 From de40614de997a388499f9a01d5eeb7cd8d3c34d1 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 13 Apr 2018 15:37:59 +0200 Subject: firmware: dmi_scan: Add DMI_OEM_STRING support to dmi_matches OEM strings are defined by each OEM and they contain customized and useful OEM information. Supporting it provides more flexible uses of the dmi_matches function. Signed-off-by: Alex Hung Signed-off-by: Jean Delvare --- include/linux/mod_devicetable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 48fb2b43c35a..7d361be2e24f 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -502,6 +502,7 @@ enum dmi_field { DMI_CHASSIS_SERIAL, DMI_CHASSIS_ASSET_TAG, DMI_STRING_MAX, + DMI_OEM_STRING, /* special case - will not be in dmi_ident */ }; struct dmi_strmatch { -- cgit v1.2.3 From 9ec4ecef0af7790551109283ca039a7c52de343c Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 13 Apr 2018 15:35:49 -0700 Subject: kexec_file,x86,powerpc: factor out kexec_file_ops functions As arch_kexec_kernel_image_{probe,load}(), arch_kimage_file_post_load_cleanup() and arch_kexec_kernel_verify_sig() are almost duplicated among architectures, they can be commonalized with an architecture-defined kexec_file_ops array. So let's factor them out. Link: http://lkml.kernel.org/r/20180306102303.9063-3-takahiro.akashi@linaro.org Signed-off-by: AKASHI Takahiro Acked-by: Dave Young Tested-by: Dave Young Cc: Vivek Goyal Cc: Baoquan He Cc: Michael Ellerman Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0ebcbeb21056..102c725421a1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -135,6 +135,11 @@ struct kexec_file_ops { #endif }; +extern const struct kexec_file_ops * const kexec_file_loaders[]; + +int kexec_image_probe_default(struct kimage *image, void *buf, + unsigned long buf_len); + /** * struct kexec_buf - parameters for finding a place for a buffer in memory * @image: kexec image in which memory to search. @@ -209,7 +214,7 @@ struct kimage { unsigned long cmdline_buf_len; /* File operations provided by image loader */ - struct kexec_file_ops *fops; + const struct kexec_file_ops *fops; /* Image loader handling the kernel can store a pointer here */ void *image_loader_data; @@ -273,12 +278,6 @@ int crash_shrink_memory(unsigned long new_size); size_t crash_get_memory_size(void); void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); -int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len); -void * __weak arch_kexec_kernel_image_load(struct kimage *image); -int __weak arch_kimage_file_post_load_cleanup(struct kimage *image); -int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, - unsigned long buf_len); int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned int relsec); int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, -- cgit v1.2.3 From babac4a84a88842bec477a5bdada1460f3bc374c Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 13 Apr 2018 15:36:06 -0700 Subject: kexec_file, x86: move re-factored code to generic side In the previous patches, commonly-used routines, exclude_mem_range() and prepare_elf64_headers(), were carved out. Now place them in kexec common code. A prefix "crash_" is given to each of their names to avoid possible name collisions. Link: http://lkml.kernel.org/r/20180306102303.9063-8-takahiro.akashi@linaro.org Signed-off-by: AKASHI Takahiro Acked-by: Dave Young Tested-by: Dave Young Cc: Vivek Goyal Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 102c725421a1..68865fd51aad 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -168,6 +168,25 @@ int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(struct resource *, void *)); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); + +/* Alignment required for elf header segment */ +#define ELF_CORE_HEADER_ALIGN 4096 + +struct crash_mem_range { + u64 start, end; +}; + +struct crash_mem { + unsigned int max_nr_ranges; + unsigned int nr_ranges; + struct crash_mem_range ranges[0]; +}; + +extern int crash_exclude_mem_range(struct crash_mem *mem, + unsigned long long mstart, + unsigned long long mend); +extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, + void **addr, unsigned long *sz); #endif /* CONFIG_KEXEC_FILE */ struct kimage { -- cgit v1.2.3 From ee6ebeda8ddc350700168f2c8052a97bd9c11e5b Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 13 Apr 2018 15:36:10 -0700 Subject: include/linux/kexec.h: silence compile warnings Patch series "kexec_file: Clean up purgatory load", v2. Following the discussion with Dave and AKASHI, here are the common code patches extracted from my recent patch set (Add kexec_file_load support to s390) [1]. The patches were extracted to allow upstream integration together with AKASHI's common code patches before the arch code gets adjusted to the new base. The reason for this series is to prepare common code for adding kexec_file_load to s390 as well as cleaning up the mis-use of the sh_offset field during purgatory load. In detail this series contains: Patch #1&2: Minor cleanups/fixes. Patch #3-9: Clean up the purgatory load/relocation code. Especially remove the mis-use of the purgatory_info->sechdrs->sh_offset field, currently holding a pointer into either kexec_purgatory (ro) or purgatory_buf (rw) depending on the section. With these patches the section address will be calculated verbosely and sh_offset will contain the offset of the section in the stripped purgatory binary (purgatory_buf). Patch #10: Allows architectures to set the purgatory load address. This patch is important for s390 as the kernel and purgatory have to be loaded to fixed addresses. In current code this is impossible as the purgatory load is opaque to the architecture. Patch #11: Moves x86 purgatories sha implementation to common lib/ directory to allow reuse in other architectures. This patch (of 11) When building the kernel with CONFIG_KEXEC_FILE enabled gcc prints a compile warning multiple times. In file included from /linux/init/initramfs.c:526:0: /include/linux/kexec.h:120:9: warning: `struct kimage' declared inside parameter list [enabled by default] unsigned long cmdline_len); ^ This is because the typedefs for kexec_file_load uses struct kimage before it is declared. Fix this by simply forward declaring struct kimage. Link: http://lkml.kernel.org/r/20180321112751.22196-2-prudo@linux.vnet.ibm.com Signed-off-by: Philipp Rudo Acked-by: Dave Young Cc: Eric Biederman Cc: Vivek Goyal Cc: Michael Ellerman Cc: Thiago Jung Bauermann Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: AKASHI Takahiro Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 68865fd51aad..08b8b9d00f97 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -114,6 +114,8 @@ struct purgatory_info { unsigned long purgatory_load_addr; }; +struct kimage; + typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size); typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf, unsigned long kernel_len, char *initrd, -- cgit v1.2.3 From 65c225d3280542f3ea145e052215ce0538f6bb69 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 13 Apr 2018 15:36:17 -0700 Subject: kernel/kexec_file.c: make purgatory_info->ehdr const The kexec_purgatory buffer is read-only. Thus all pointers into kexec_purgatory are read-only, too. Point this out by explicitly marking purgatory_info->ehdr as 'const' and update the comments in purgatory_info. Link: http://lkml.kernel.org/r/20180321112751.22196-4-prudo@linux.vnet.ibm.com Signed-off-by: Philipp Rudo Acked-by: Dave Young Cc: AKASHI Takahiro Cc: Eric Biederman Cc: Heiko Carstens Cc: Ingo Molnar Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Thiago Jung Bauermann Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 08b8b9d00f97..8c5819d1a808 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -99,14 +99,19 @@ struct compat_kexec_segment { #ifdef CONFIG_KEXEC_FILE struct purgatory_info { - /* Pointer to elf header of read only purgatory */ - Elf_Ehdr *ehdr; - - /* Pointer to purgatory sechdrs which are modifiable */ + /* + * Pointer to elf header at the beginning of kexec_purgatory. + * Note: kexec_purgatory is read only + */ + const Elf_Ehdr *ehdr; + /* + * Temporary, modifiable buffer for sechdrs used for relocation. + * This memory can be freed post image load. + */ Elf_Shdr *sechdrs; /* - * Temporary buffer location where purgatory is loaded and relocated - * This memory can be freed post image load + * Temporary, modifiable buffer for stripped purgatory used for + * relocation. This memory can be freed post image load. */ void *purgatory_buf; -- cgit v1.2.3 From 8aec395b8478310521031157ef5d44ef19c2c581 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 13 Apr 2018 15:36:24 -0700 Subject: kernel/kexec_file.c: use read-only sections in arch_kexec_apply_relocations* When the relocations are applied to the purgatory only the section the relocations are applied to is writable. The other sections, i.e. the symtab and .rel/.rela, are in read-only kexec_purgatory. Highlight this by marking the corresponding variables as 'const'. While at it also change the signatures of arch_kexec_apply_relocations* to take section pointers instead of just the index of the relocation section. This removes the second lookup and sanity check of the sections in arch code. Link: http://lkml.kernel.org/r/20180321112751.22196-6-prudo@linux.vnet.ibm.com Signed-off-by: Philipp Rudo Acked-by: Dave Young Cc: AKASHI Takahiro Cc: Eric Biederman Cc: Heiko Carstens Cc: Ingo Molnar Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Thiago Jung Bauermann Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 8c5819d1a808..0e389b9b7722 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -171,6 +171,15 @@ struct kexec_buf { bool top_down; }; +int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +int __weak arch_kexec_apply_relocations(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); + int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(struct resource *, void *)); extern int kexec_add_buffer(struct kexec_buf *kbuf); @@ -304,10 +313,6 @@ int crash_shrink_memory(unsigned long new_size); size_t crash_get_memory_size(void); void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); -int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, - Elf_Shdr *sechdrs, unsigned int relsec); -int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, - unsigned int relsec); void arch_kexec_protect_crashkres(void); void arch_kexec_unprotect_crashkres(void); -- cgit v1.2.3 From 3be3f61d25e04ecf90d65d52fad632af5ba8805b Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 13 Apr 2018 15:36:43 -0700 Subject: kernel/kexec_file.c: allow archs to set purgatory load address For s390 new kernels are loaded to fixed addresses in memory before they are booted. With the current code this is a problem as it assumes the kernel will be loaded to an 'arbitrary' address. In particular, kexec_locate_mem_hole searches for a large enough memory region and sets the load address (kexec_bufer->mem) to it. Luckily there is a simple workaround for this problem. By returning 1 in arch_kexec_walk_mem, kexec_locate_mem_hole is turned off. This allows the architecture to set kbuf->mem by hand. While the trick works fine for the kernel it does not for the purgatory as here the architectures don't have access to its kexec_buffer. Give architectures access to the purgatories kexec_buffer by changing kexec_load_purgatory to take a pointer to it. With this change architectures have access to the buffer and can edit it as they need. A nice side effect of this change is that we can get rid of the purgatory_info->purgatory_load_address field. As now the information stored there can directly be accessed from kbuf->mem. Link: http://lkml.kernel.org/r/20180321112751.22196-11-prudo@linux.vnet.ibm.com Signed-off-by: Philipp Rudo Reviewed-by: Martin Schwidefsky Acked-by: Dave Young Cc: AKASHI Takahiro Cc: Eric Biederman Cc: Heiko Carstens Cc: Ingo Molnar Cc: Michael Ellerman Cc: Thiago Jung Bauermann Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0e389b9b7722..9e4e638fb505 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -114,9 +114,6 @@ struct purgatory_info { * relocation. This memory can be freed post image load. */ void *purgatory_buf; - - /* Address where purgatory is finally loaded and is executed from */ - unsigned long purgatory_load_addr; }; struct kimage; @@ -171,6 +168,12 @@ struct kexec_buf { bool top_down; }; +int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf); +int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, + void *buf, unsigned int size, + bool get_value); +void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); + int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, const Elf_Shdr *relsec, @@ -266,14 +269,6 @@ extern void machine_kexec_cleanup(struct kimage *image); extern int kernel_kexec(void); extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); -extern int kexec_load_purgatory(struct kimage *image, unsigned long min, - unsigned long max, int top_down, - unsigned long *load_addr); -extern int kexec_purgatory_get_set_symbol(struct kimage *image, - const char *name, void *buf, - unsigned int size, bool get_value); -extern void *kexec_purgatory_get_symbol_addr(struct kimage *image, - const char *name); extern void __crash_kexec(struct pt_regs *); extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); -- cgit v1.2.3 From df6f2801f511b07c08c110fe9f047a34cb40286f Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 13 Apr 2018 15:36:46 -0700 Subject: kernel/kexec_file.c: move purgatories sha256 to common code The code to verify the new kernels sha digest is applicable for all architectures. Move it to common code. One problem is the string.c implementation on x86. Currently sha256 includes x86/boot/string.h which defines memcpy and memset to be gcc builtins. By moving the sha256 implementation to common code and changing the include to linux/string.h both functions are no longer defined. Thus definitions have to be provided in x86/purgatory/string.c Link: http://lkml.kernel.org/r/20180321112751.22196-12-prudo@linux.vnet.ibm.com Signed-off-by: Philipp Rudo Acked-by: Dave Young Cc: AKASHI Takahiro Cc: Eric Biederman Cc: Heiko Carstens Cc: Ingo Molnar Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Thiago Jung Bauermann Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sha256.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/sha256.h (limited to 'include/linux') diff --git a/include/linux/sha256.h b/include/linux/sha256.h new file mode 100644 index 000000000000..244fe01a65fb --- /dev/null +++ b/include/linux/sha256.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2014 Red Hat Inc. + * + * Author: Vivek Goyal + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#ifndef SHA256_H +#define SHA256_H + +#include +#include + +/* + * Stand-alone implementation of the SHA256 algorithm. It is designed to + * have as little dependencies as possible so it can be used in the + * kexec_file purgatory. In other cases you should use the implementation in + * crypto/. + * + * For details see lib/sha256.c + */ + +extern int sha256_init(struct sha256_state *sctx); +extern int sha256_update(struct sha256_state *sctx, const u8 *input, + unsigned int length); +extern int sha256_final(struct sha256_state *sctx, u8 *hash); + +#endif /* SHA256_H */ -- cgit v1.2.3