From d8f2ebaac650dc35db3bf5cf10e8ee1115b455f8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 13 Apr 2017 11:41:38 +1000 Subject: sync_file: get rid of internal reference count. sync_file uses the reference count of the file, the internal kref was never getting moved past 1. We can reintroduce this if we decide we need it later. [airlied: fix buildbot warnings] Reviewed-by: Chris Wilson Signed-off-by: Dave Airlie Acked-by: Sumit Semwal Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170413014144.637-2-airlied@gmail.com --- include/linux/sync_file.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index 3e3ab84fc4cd..d37beefdfbd5 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -14,7 +14,6 @@ #define _LINUX_SYNC_FILE_H #include -#include #include #include #include @@ -24,7 +23,6 @@ /** * struct sync_file - sync file to export to the userspace * @file: file representing this fence - * @kref: reference count on fence. * @name: name of sync_file. Useful for debugging * @sync_file_list: membership in global file list * @wq: wait queue for fence signaling @@ -33,7 +31,6 @@ */ struct sync_file { struct file *file; - struct kref kref; char name[32]; #ifdef CONFIG_DEBUG_FS struct list_head sync_file_list; -- cgit v1.2.3 From 9ff88edc5e7bad08bdd79a20f14533a5cf44b865 Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Sun, 7 May 2017 18:24:24 +0800 Subject: iio: hid-sensor-rotation: Add relative orientation sensor hid support Relative orientation(AG) sensor is a 6dof orientation sensor, it depends on acceleration and gyroscope sensor data. It gives a quaternion describing the orientation of the device relative to an initial orientation. It is a standard HID sensor. More information can be found in: http://www.usb.org/developers/hidpage/HUTRR59_-_Usages_for_Wearables.pdf Relative orientation(AG) sensor and dev rotation sensor have same channels and share channel usage id. So the most of the code for relative orientation sensor can be reused. Signed-off-by: Song Hongyan Reviewed-by: Andy Shevchenko Reviewed-by: Xu Even Acked-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 761f86242473..b5469e878e99 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -90,6 +90,7 @@ #define HID_USAGE_SENSOR_ORIENT_TILT_Z 0x200481 #define HID_USAGE_SENSOR_DEVICE_ORIENTATION 0x20008A +#define HID_USAGE_SENSOR_RELATIVE_ORIENTATION 0x20008E #define HID_USAGE_SENSOR_ORIENT_ROTATION_MATRIX 0x200482 #define HID_USAGE_SENSOR_ORIENT_QUATERNION 0x200483 #define HID_USAGE_SENSOR_ORIENT_MAGN_FLUX 0x200484 -- cgit v1.2.3 From 00907c7a3282053dd4782e02c3101809608e7ea7 Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Sun, 7 May 2017 18:24:25 +0800 Subject: iio: hid-sensor-rotation: Add geomagnetic orientation sensor hid support. Geomagnetic orientation(AM) sensor is one kind of orientation 6dof sensor. It gives the device rotation in respect to the earth center and the magnetic north. The sensor is implemented through use of an accelerometer and magnetometer do not use gyroscope. It is a standard HID sensor. More information can be found in: http://www.usb.org/developers/hidpage/HUTRR59_-_Usages_for_Wearables.pdf Geomagnetic orientation(AM) sensor and dev rotation sensor have same channel and share channel usage id. So the most of the code for relative orientation sensor can be reused. Signed-off-by: Song Hongyan Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index b5469e878e99..5af62c7e49f3 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -91,6 +91,7 @@ #define HID_USAGE_SENSOR_DEVICE_ORIENTATION 0x20008A #define HID_USAGE_SENSOR_RELATIVE_ORIENTATION 0x20008E +#define HID_USAGE_SENSOR_GEOMAGNETIC_ORIENTATION 0x2000C1 #define HID_USAGE_SENSOR_ORIENT_ROTATION_MATRIX 0x200482 #define HID_USAGE_SENSOR_ORIENT_QUATERNION 0x200483 #define HID_USAGE_SENSOR_ORIENT_MAGN_FLUX 0x200484 -- cgit v1.2.3 From 6fb34812c2a2a4cdcdad4452b9634892812fa97b Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 2 May 2017 14:33:45 +0200 Subject: iio: stm32 trigger: Add support for TRGO2 triggers Add support for TRGO2 trigger that can be found on STM32F7. Add additional master modes supported by TRGO2. Register additional "tim[1/8]_trgo2" triggers for timer1 & timer8. Detect TRGO2 timer capability (master mode selection 2). Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- include/linux/iio/timer/stm32-timer-trigger.h | 2 ++ include/linux/mfd/stm32-timers.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/timer/stm32-timer-trigger.h b/include/linux/iio/timer/stm32-timer-trigger.h index 55535aef2e6c..fa7d786ed99e 100644 --- a/include/linux/iio/timer/stm32-timer-trigger.h +++ b/include/linux/iio/timer/stm32-timer-trigger.h @@ -10,6 +10,7 @@ #define _STM32_TIMER_TRIGGER_H_ #define TIM1_TRGO "tim1_trgo" +#define TIM1_TRGO2 "tim1_trgo2" #define TIM1_CH1 "tim1_ch1" #define TIM1_CH2 "tim1_ch2" #define TIM1_CH3 "tim1_ch3" @@ -44,6 +45,7 @@ #define TIM7_TRGO "tim7_trgo" #define TIM8_TRGO "tim8_trgo" +#define TIM8_TRGO2 "tim8_trgo2" #define TIM8_CH1 "tim8_ch1" #define TIM8_CH2 "tim8_ch2" #define TIM8_CH3 "tim8_ch3" diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index 4a0abbc10ef6..ce7346e7f77a 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -34,6 +34,7 @@ #define TIM_CR1_DIR BIT(4) /* Counter Direction */ #define TIM_CR1_ARPE BIT(7) /* Auto-reload Preload Ena */ #define TIM_CR2_MMS (BIT(4) | BIT(5) | BIT(6)) /* Master mode selection */ +#define TIM_CR2_MMS2 GENMASK(23, 20) /* Master mode selection 2 */ #define TIM_SMCR_SMS (BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */ #define TIM_SMCR_TS (BIT(4) | BIT(5) | BIT(6)) /* Trigger selection */ #define TIM_DIER_UIE BIT(0) /* Update interrupt */ @@ -60,6 +61,7 @@ #define MAX_TIM_PSC 0xFFFF #define TIM_CR2_MMS_SHIFT 4 +#define TIM_CR2_MMS2_SHIFT 20 #define TIM_SMCR_TS_SHIFT 4 #define TIM_BDTR_BKF_MASK 0xF #define TIM_BDTR_BKF_SHIFT 16 -- cgit v1.2.3 From 73c73463189974ace90a05397197339071c6ecc7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Apr 2017 20:17:45 -0700 Subject: video: ARM CLCD: Move registers to a separate header. We'd like to reuse these register definitions for the DRM CLCD driver, but there's a bunch of fbdev-specific code in the current header. v2: Add #ifndef guard. Signed-off-by: Eric Anholt Link: http://patchwork.freedesktop.org/patch/msgid/20170413031746.12921-1-eric@anholt.net --- include/linux/amba/clcd-regs.h | 81 ++++++++++++++++++++++++++++++++++++++++++ include/linux/amba/clcd.h | 68 +---------------------------------- 2 files changed, 82 insertions(+), 67 deletions(-) create mode 100644 include/linux/amba/clcd-regs.h (limited to 'include/linux') diff --git a/include/linux/amba/clcd-regs.h b/include/linux/amba/clcd-regs.h new file mode 100644 index 000000000000..69c0e2143003 --- /dev/null +++ b/include/linux/amba/clcd-regs.h @@ -0,0 +1,81 @@ +/* + * David A Rusling + * + * Copyright (C) 2001 ARM Limited + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#ifndef AMBA_CLCD_REGS_H +#define AMBA_CLCD_REGS_H + +/* + * CLCD Controller Internal Register addresses + */ +#define CLCD_TIM0 0x00000000 +#define CLCD_TIM1 0x00000004 +#define CLCD_TIM2 0x00000008 +#define CLCD_TIM3 0x0000000c +#define CLCD_UBAS 0x00000010 +#define CLCD_LBAS 0x00000014 + +#define CLCD_PL110_IENB 0x00000018 +#define CLCD_PL110_CNTL 0x0000001c +#define CLCD_PL110_STAT 0x00000020 +#define CLCD_PL110_INTR 0x00000024 +#define CLCD_PL110_UCUR 0x00000028 +#define CLCD_PL110_LCUR 0x0000002C + +#define CLCD_PL111_CNTL 0x00000018 +#define CLCD_PL111_IENB 0x0000001c +#define CLCD_PL111_RIS 0x00000020 +#define CLCD_PL111_MIS 0x00000024 +#define CLCD_PL111_ICR 0x00000028 +#define CLCD_PL111_UCUR 0x0000002c +#define CLCD_PL111_LCUR 0x00000030 + +#define CLCD_PALL 0x00000200 +#define CLCD_PALETTE 0x00000200 + +#define TIM2_CLKSEL (1 << 5) +#define TIM2_IVS (1 << 11) +#define TIM2_IHS (1 << 12) +#define TIM2_IPC (1 << 13) +#define TIM2_IOE (1 << 14) +#define TIM2_BCD (1 << 26) + +#define CNTL_LCDEN (1 << 0) +#define CNTL_LCDBPP1 (0 << 1) +#define CNTL_LCDBPP2 (1 << 1) +#define CNTL_LCDBPP4 (2 << 1) +#define CNTL_LCDBPP8 (3 << 1) +#define CNTL_LCDBPP16 (4 << 1) +#define CNTL_LCDBPP16_565 (6 << 1) +#define CNTL_LCDBPP16_444 (7 << 1) +#define CNTL_LCDBPP24 (5 << 1) +#define CNTL_LCDBW (1 << 4) +#define CNTL_LCDTFT (1 << 5) +#define CNTL_LCDMONO8 (1 << 6) +#define CNTL_LCDDUAL (1 << 7) +#define CNTL_BGR (1 << 8) +#define CNTL_BEBO (1 << 9) +#define CNTL_BEPO (1 << 10) +#define CNTL_LCDPWR (1 << 11) +#define CNTL_LCDVCOMP(x) ((x) << 12) +#define CNTL_LDMAFIFOTIME (1 << 15) +#define CNTL_WATERMARK (1 << 16) + +/* ST Microelectronics variant bits */ +#define CNTL_ST_1XBPP_444 0x0 +#define CNTL_ST_1XBPP_5551 (1 << 17) +#define CNTL_ST_1XBPP_565 (1 << 18) +#define CNTL_ST_CDWID_12 0x0 +#define CNTL_ST_CDWID_16 (1 << 19) +#define CNTL_ST_CDWID_18 (1 << 20) +#define CNTL_ST_CDWID_24 ((1 << 19)|(1 << 20)) +#define CNTL_ST_CEAEN (1 << 21) +#define CNTL_ST_LCDBPP24_PACKED (6 << 1) + +#endif /* AMBA_CLCD_REGS_H */ diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index 1035879b322c..d0c3be77c18e 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -10,73 +10,7 @@ * for more details. */ #include - -/* - * CLCD Controller Internal Register addresses - */ -#define CLCD_TIM0 0x00000000 -#define CLCD_TIM1 0x00000004 -#define CLCD_TIM2 0x00000008 -#define CLCD_TIM3 0x0000000c -#define CLCD_UBAS 0x00000010 -#define CLCD_LBAS 0x00000014 - -#define CLCD_PL110_IENB 0x00000018 -#define CLCD_PL110_CNTL 0x0000001c -#define CLCD_PL110_STAT 0x00000020 -#define CLCD_PL110_INTR 0x00000024 -#define CLCD_PL110_UCUR 0x00000028 -#define CLCD_PL110_LCUR 0x0000002C - -#define CLCD_PL111_CNTL 0x00000018 -#define CLCD_PL111_IENB 0x0000001c -#define CLCD_PL111_RIS 0x00000020 -#define CLCD_PL111_MIS 0x00000024 -#define CLCD_PL111_ICR 0x00000028 -#define CLCD_PL111_UCUR 0x0000002c -#define CLCD_PL111_LCUR 0x00000030 - -#define CLCD_PALL 0x00000200 -#define CLCD_PALETTE 0x00000200 - -#define TIM2_CLKSEL (1 << 5) -#define TIM2_IVS (1 << 11) -#define TIM2_IHS (1 << 12) -#define TIM2_IPC (1 << 13) -#define TIM2_IOE (1 << 14) -#define TIM2_BCD (1 << 26) - -#define CNTL_LCDEN (1 << 0) -#define CNTL_LCDBPP1 (0 << 1) -#define CNTL_LCDBPP2 (1 << 1) -#define CNTL_LCDBPP4 (2 << 1) -#define CNTL_LCDBPP8 (3 << 1) -#define CNTL_LCDBPP16 (4 << 1) -#define CNTL_LCDBPP16_565 (6 << 1) -#define CNTL_LCDBPP16_444 (7 << 1) -#define CNTL_LCDBPP24 (5 << 1) -#define CNTL_LCDBW (1 << 4) -#define CNTL_LCDTFT (1 << 5) -#define CNTL_LCDMONO8 (1 << 6) -#define CNTL_LCDDUAL (1 << 7) -#define CNTL_BGR (1 << 8) -#define CNTL_BEBO (1 << 9) -#define CNTL_BEPO (1 << 10) -#define CNTL_LCDPWR (1 << 11) -#define CNTL_LCDVCOMP(x) ((x) << 12) -#define CNTL_LDMAFIFOTIME (1 << 15) -#define CNTL_WATERMARK (1 << 16) - -/* ST Microelectronics variant bits */ -#define CNTL_ST_1XBPP_444 0x0 -#define CNTL_ST_1XBPP_5551 (1 << 17) -#define CNTL_ST_1XBPP_565 (1 << 18) -#define CNTL_ST_CDWID_12 0x0 -#define CNTL_ST_CDWID_16 (1 << 19) -#define CNTL_ST_CDWID_18 (1 << 20) -#define CNTL_ST_CDWID_24 ((1 << 19)|(1 << 20)) -#define CNTL_ST_CEAEN (1 << 21) -#define CNTL_ST_LCDBPP24_PACKED (6 << 1) +#include enum { /* individual formats */ -- cgit v1.2.3 From 61df56bef97e1708bfbc006b307b00834ad61fe8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 10 May 2017 17:12:52 +0200 Subject: HID: Add mapping for Microsoft Win8 Wireless Radio Controls extensions Microsoft has defined some extra HUT codes for the Generic Desktop Page for Wireless Radio controls, see: https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management https://web.archive.org/web/20170509144631/https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management I've 3 2-in-1 keyboard docks: Dell Venue Pro 11 keyboard dock, HP pavilion x2 keyboard dock and a PEAQ C1010 keyboard dock which have a wireless radio toggle hotkey, which uses the 0x000100c6 HUT code defined in these extensions. This commit adds a mapping for this key, this makes the rfkill toggle hotkey work on the Dell Venue Pro 11 and HP Pavilion X2 keyboards, the PEAQ C1010 keyboard does generate events for the 0x000100c6 HUT code when pressed, but the reported value is always 0. Signed-off-by: Hans de Goede Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5be325d890d9..0b29466bbc21 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -182,6 +182,12 @@ struct hid_item { #define HID_GD_KEYBOARD 0x00010006 #define HID_GD_KEYPAD 0x00010007 #define HID_GD_MULTIAXIS 0x00010008 +/* + * Microsoft Win8 Wireless Radio Controls extensions CA, see (checked 09052017): + * https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management + * https://web.archive.org/web/20170509144631/https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management + */ +#define HID_GD_WIRELESS_RADIO_CTLS 0x0001000c #define HID_GD_X 0x00010030 #define HID_GD_Y 0x00010031 #define HID_GD_Z 0x00010032 @@ -210,6 +216,10 @@ struct hid_item { #define HID_GD_DOWN 0x00010091 #define HID_GD_RIGHT 0x00010092 #define HID_GD_LEFT 0x00010093 +/* Microsoft Win8 Wireless Radio Controls CA usage codes */ +#define HID_GD_RFKILL_BTN 0x000100c6 +#define HID_GD_RFKILL_LED 0x000100c7 +#define HID_GD_RFKILL_SWITCH 0x000100c8 #define HID_DC_BATTERYSTRENGTH 0x00060020 -- cgit v1.2.3 From 1b9a07ee25049724ab7f7c32282fbf5452530cea Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 10 May 2017 21:32:18 +0300 Subject: {net, IB}/mlx5: Replace mlx5_vzalloc with kvzalloc Commit a7c3e901a46f ("mm: introduce kv[mz]alloc helpers") added proper implementation of mlx5_vzalloc function to the MM core. This made the mlx5_vzalloc function useless, so let's remove it. Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bcdf739ee41a..c2740688d679 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -890,11 +890,6 @@ static inline u16 cmdif_rev(struct mlx5_core_dev *dev) return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; } -static inline void *mlx5_vzalloc(unsigned long size) -{ - return kvzalloc(size, GFP_KERNEL); -} - static inline u32 mlx5_base_mkey(const u32 key) { return key & 0xffffff00u; -- cgit v1.2.3 From bb29b9cccd95feeb43e11e9b1c2479777082e28a Mon Sep 17 00:00:00 2001 From: Anders Darander Date: Thu, 27 Apr 2017 08:37:33 +0200 Subject: leds: pca963x: Add bindings to invert polarity Add a new DT property, nxp,inverted-out, to invert the polarity of the output. Tested on PCA9634. Signed-off-by: Anders Darander Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/platform_data/leds-pca963x.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/leds-pca963x.h b/include/linux/platform_data/leds-pca963x.h index e731f0036329..54e845ffb5ed 100644 --- a/include/linux/platform_data/leds-pca963x.h +++ b/include/linux/platform_data/leds-pca963x.h @@ -33,10 +33,16 @@ enum pca963x_blink_type { PCA963X_HW_BLINK, }; +enum pca963x_direction { + PCA963X_NORMAL, + PCA963X_INVERTED, +}; + struct pca963x_platform_data { struct led_platform_data leds; enum pca963x_outdrv outdrv; enum pca963x_blink_type blink_type; + enum pca963x_direction dir; }; #endif /* __LINUX_PCA963X_H*/ -- cgit v1.2.3 From 0179720d6be2096b8d0a4d143254ff9e77747daa Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Sun, 7 May 2017 13:48:31 +0300 Subject: net/mlx5: Introduce trigger_health_work function Introduce new function for entering bad-health state. This function will be called from FPGA-related logic in a later patch from asynchronous event (IRQ) context, for that we change the spin lock to an IRQ-safe one. Signed-off-by: Ilan Tayari Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c2740688d679..a277bb36c21f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -915,6 +915,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); +void mlx5_trigger_health_work(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); -- cgit v1.2.3 From e29341fb3a5b885a4bb5b9a38f2814ca07d3382c Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Mon, 13 Mar 2017 20:05:45 +0200 Subject: net/mlx5: FPGA, Add basic support for Innova Mellanox Innova is a NIC with ConnectX and an FPGA on the same board. The FPGA is a bump-on-the-wire and thus affects operation of the mlx5_core driver on the ConnectX ASIC. Add basic support for Innova in mlx5_core. This allows using the Innova card as a regular NIC, by detecting the FPGA capability bit, and verifying its load state before initializing ConnectX interfaces. Also detect FPGA fatal runtime failures and enter error state if they ever happen. All new FPGA-related logic is placed in its own subdirectory 'fpga', which may be built by selecting CONFIG_MLX5_FPGA. This prepares for further support of various Innova features in later patchsets. Additional details about hardware architecture will be provided as more features get submitted. Signed-off-by: Ilan Tayari Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 6 ++ include/linux/mlx5/driver.h | 5 ++ include/linux/mlx5/mlx5_ifc.h | 11 ++- include/linux/mlx5/mlx5_ifc_fpga.h | 144 +++++++++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 include/linux/mlx5/mlx5_ifc_fpga.h (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index dd9a263ed368..786a43843da9 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -300,6 +300,8 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, + + MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, }; enum { @@ -967,6 +969,7 @@ enum mlx5_cap_type { MLX5_CAP_RESERVED, MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, + MLX5_CAP_FPGA, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1088,6 +1091,9 @@ enum mlx5_mcam_feature_groups { #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) +#define MLX5_CAP_FPGA(mdev, cap) \ + MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a277bb36c21f..55bb712643cb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -108,6 +108,8 @@ enum { MLX5_REG_QTCT = 0x400a, MLX5_REG_DCBX_PARAM = 0x4020, MLX5_REG_DCBX_APP = 0x4021, + MLX5_REG_FPGA_CAP = 0x4022, + MLX5_REG_FPGA_CTRL = 0x4023, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, @@ -761,6 +763,9 @@ struct mlx5_core_dev { atomic_t num_qps; u32 issi; struct mlx5e_resources mlx5e_res; +#ifdef CONFIG_MLX5_FPGA + struct mlx5_fpga_device *fpga; +#endif #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 32de0724b400..6fa1eb6766af 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -32,6 +32,8 @@ #ifndef MLX5_IFC_H #define MLX5_IFC_H +#include "mlx5_ifc_fpga.h" + enum { MLX5_EVENT_TYPE_CODING_COMPLETION_EVENTS = 0x0, MLX5_EVENT_TYPE_CODING_PATH_MIGRATED_SUCCEEDED = 0x1, @@ -56,7 +58,8 @@ enum { MLX5_EVENT_TYPE_CODING_STALL_VL_EVENT = 0x1b, MLX5_EVENT_TYPE_CODING_DROPPED_PACKET_LOGGED_EVENT = 0x1f, MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION = 0xa, - MLX5_EVENT_TYPE_CODING_PAGE_REQUEST = 0xb + MLX5_EVENT_TYPE_CODING_PAGE_REQUEST = 0xb, + MLX5_EVENT_TYPE_CODING_FPGA_ERROR = 0x20, }; enum { @@ -854,7 +857,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_tc[0x4]; u8 reserved_at_1d0[0x1]; u8 dcbx[0x1]; - u8 reserved_at_1d2[0x4]; + u8 reserved_at_1d2[0x3]; + u8 fpga[0x1]; u8 rol_s[0x1]; u8 rol_g[0x1]; u8 reserved_at_1d8[0x1]; @@ -2186,6 +2190,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; struct mlx5_ifc_qos_cap_bits qos_cap; + struct mlx5_ifc_fpga_cap_bits fpga_cap; u8 reserved_at_0[0x8000]; }; @@ -8182,6 +8187,8 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_sltp_reg_bits sltp_reg; struct mlx5_ifc_mtpps_reg_bits mtpps_reg; struct mlx5_ifc_mtppse_reg_bits mtppse_reg; + struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits; + struct mlx5_ifc_fpga_cap_bits fpga_cap_bits; u8 reserved_at_0[0x60e0]; }; diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h new file mode 100644 index 000000000000..0032d10ac6cf --- /dev/null +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef MLX5_IFC_FPGA_H +#define MLX5_IFC_FPGA_H + +struct mlx5_ifc_fpga_shell_caps_bits { + u8 max_num_qps[0x10]; + u8 reserved_at_10[0x8]; + u8 total_rcv_credits[0x8]; + + u8 reserved_at_20[0xe]; + u8 qp_type[0x2]; + u8 reserved_at_30[0x5]; + u8 rae[0x1]; + u8 rwe[0x1]; + u8 rre[0x1]; + u8 reserved_at_38[0x4]; + u8 dc[0x1]; + u8 ud[0x1]; + u8 uc[0x1]; + u8 rc[0x1]; + + u8 reserved_at_40[0x1a]; + u8 log_ddr_size[0x6]; + + u8 max_fpga_qp_msg_size[0x20]; + + u8 reserved_at_80[0x180]; +}; + +struct mlx5_ifc_fpga_cap_bits { + u8 fpga_id[0x8]; + u8 fpga_device[0x18]; + + u8 register_file_ver[0x20]; + + u8 fpga_ctrl_modify[0x1]; + u8 reserved_at_41[0x5]; + u8 access_reg_query_mode[0x2]; + u8 reserved_at_48[0x6]; + u8 access_reg_modify_mode[0x2]; + u8 reserved_at_50[0x10]; + + u8 reserved_at_60[0x20]; + + u8 image_version[0x20]; + + u8 image_date[0x20]; + + u8 image_time[0x20]; + + u8 shell_version[0x20]; + + u8 reserved_at_100[0x80]; + + struct mlx5_ifc_fpga_shell_caps_bits shell_caps; + + u8 reserved_at_380[0x8]; + u8 ieee_vendor_id[0x18]; + + u8 sandbox_product_version[0x10]; + u8 sandbox_product_id[0x10]; + + u8 sandbox_basic_caps[0x20]; + + u8 reserved_at_3e0[0x10]; + u8 sandbox_extended_caps_len[0x10]; + + u8 sandbox_extended_caps_addr[0x40]; + + u8 fpga_ddr_start_addr[0x40]; + + u8 fpga_cr_space_start_addr[0x40]; + + u8 fpga_ddr_size[0x20]; + + u8 fpga_cr_space_size[0x20]; + + u8 reserved_at_500[0x300]; +}; + +struct mlx5_ifc_fpga_ctrl_bits { + u8 reserved_at_0[0x8]; + u8 operation[0x8]; + u8 reserved_at_10[0x8]; + u8 status[0x8]; + + u8 reserved_at_20[0x8]; + u8 flash_select_admin[0x8]; + u8 reserved_at_30[0x8]; + u8 flash_select_oper[0x8]; + + u8 reserved_at_40[0x40]; +}; + +enum { + MLX5_FPGA_ERROR_EVENT_SYNDROME_CORRUPTED_DDR = 0x1, + MLX5_FPGA_ERROR_EVENT_SYNDROME_FLASH_TIMEOUT = 0x2, + MLX5_FPGA_ERROR_EVENT_SYNDROME_INTERNAL_LINK_ERROR = 0x3, + MLX5_FPGA_ERROR_EVENT_SYNDROME_WATCHDOG_FAILURE = 0x4, + MLX5_FPGA_ERROR_EVENT_SYNDROME_I2C_FAILURE = 0x5, + MLX5_FPGA_ERROR_EVENT_SYNDROME_IMAGE_CHANGED = 0x6, + MLX5_FPGA_ERROR_EVENT_SYNDROME_TEMPERATURE_CRITICAL = 0x7, +}; + +struct mlx5_ifc_fpga_error_event_bits { + u8 reserved_at_0[0x40]; + + u8 reserved_at_40[0x18]; + u8 syndrome[0x8]; + + u8 reserved_at_60[0x80]; +}; + +#endif /* MLX5_IFC_FPGA_H */ -- cgit v1.2.3 From b421b22b00b0011f6a2ce3561176c4e79e640c49 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Apr 2017 12:14:13 +0200 Subject: x86/tsc, sched/clock, clocksource: Use clocksource watchdog to provide stable sync points Currently we keep sched_clock_tick() active for stable TSC in order to keep the per-CPU state semi up-to-date. The (obvious) problem is that by the time we detect TSC is borked, our per-CPU state is also borked. So hook into the clocksource watchdog and call a method after we've found it to still be stable. There's the obvious race where the TSC goes wonky between finding it stable and us running the callback, but closing that is too much work and not really worth it, since we're already detecting TSC wobbles after the fact, so we cannot, per definition, fully avoid funny clock values. And since the watchdog runs less often than the tick, this is also an optimization. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 1 + include/linux/sched/clock.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f2b10d9ebd04..81490456c242 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -96,6 +96,7 @@ struct clocksource { void (*suspend)(struct clocksource *cs); void (*resume)(struct clocksource *cs); void (*mark_unstable)(struct clocksource *cs); + void (*tick_stable)(struct clocksource *cs); /* private: */ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 34fe92ce1ebd..978cbb0af5f3 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -63,8 +63,8 @@ extern void clear_sched_clock_stable(void); */ extern u64 __sched_clock_offset; - extern void sched_clock_tick(void); +extern void sched_clock_tick_stable(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); -- cgit v1.2.3 From ac1e843f0900bea92fcb47f6205e1f9ffb0d469c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Apr 2017 12:26:23 +0200 Subject: sched/clock: Remove unused argument to sched_clock_idle_wakeup_event() The argument to sched_clock_idle_wakeup_event() has not been used in a long time. Remove it. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/clock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 978cbb0af5f3..9c36f0722966 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -39,7 +39,7 @@ static inline void sched_clock_idle_sleep_event(void) { } -static inline void sched_clock_idle_wakeup_event(u64 delta_ns) +static inline void sched_clock_idle_wakeup_event(void) { } @@ -66,7 +66,7 @@ extern u64 __sched_clock_offset; extern void sched_clock_tick(void); extern void sched_clock_tick_stable(void); extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); +extern void sched_clock_idle_wakeup_event(void); /* * As outlined in clock.c, provides a fast, high resolution, nanosecond -- cgit v1.2.3 From 2e44b7ddf8ab01cf98106c68388f87af15fbde73 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Apr 2017 12:46:57 +0200 Subject: sched/clock: Use late_initcall() instead of sched_init_smp() Core2 marks its TSC unstable in ACPI Processor Idle, which is probed after sched_init_smp(). Luckily it appears both acpi_processor and intel_idle (which has a similar check) are mandatory built-in. This means we can delay switching to stable until after these drivers have ran (if they were modules, this would be impossible). Delay the stable switch to late_initcall() to allow these drivers to mark TSC unstable and avoid difficult stable->unstable transitions. Reported-by: Lofstedt, Marta Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/clock.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 9c36f0722966..a55600ffdf4b 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -23,10 +23,6 @@ extern u64 sched_clock_cpu(int cpu); extern void sched_clock_init(void); #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static inline void sched_clock_init_late(void) -{ -} - static inline void sched_clock_tick(void) { } @@ -53,7 +49,6 @@ static inline u64 local_clock(void) return sched_clock(); } #else -extern void sched_clock_init_late(void); extern int sched_clock_stable(void); extern void clear_sched_clock_stable(void); -- cgit v1.2.3 From c743f0a5c50f2fcbc628526279cfa24f3dabe182 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 14 Apr 2017 14:20:05 +0200 Subject: sched/fair, cpumask: Export for_each_cpu_wrap() More users for for_each_cpu_wrap() have appeared. Promote the construct to generic cpumask interface. The implementation is slightly modified to reduce arguments. Signed-off-by: Peter Zijlstra (Intel) Cc: Lauro Ramos Venancio Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: lwang@redhat.com Link: http://lkml.kernel.org/r/20170414122005.o35me2h5nowqkxbv@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 2404ad238c0b..a21b1fb9a968 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -236,6 +236,23 @@ unsigned int cpumask_local_spread(unsigned int i, int node); (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) +extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); + +/** + * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask poiter + * @start: the start location + * + * The implementation does not assume any bit in @mask is set (including @start). + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ + (cpu) < nr_cpumask_bits; \ + (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) + /** * for_each_cpu_and - iterate over every cpu in both masks * @cpu: the (optionally unsigned) integer iterator -- cgit v1.2.3 From 785818fa8385fe55dab253e42a4c6728fca61333 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sat, 29 Apr 2017 11:06:43 +0200 Subject: mtd: nand: add core support for on-die ECC A number of NAND flashes have a capability called "on-die ECC" where the NAND chip itself is capable of detecting and correcting errors. Linux already has support for using the ECC implementation of the NAND controller, or a software based ECC implementation, but not for using the ECC implementation of the NAND controller. However, such an implementation is sometimes useful in situations where the NAND controller provides ECC algorithms that are not strong enough for the NAND chip used on the system. A typical case is a NAND chip that requires a 4-bit ECC, while the NAND controller only provides a 1-bit ECC algorithm. This commit introduces the support for the NAND_ECC_ON_DIE ECC mode: - Parsing of the "on-die" value for the "nand-ecc-mode" Device Tree property - Handling NAND_ECC_ON_DIE case in nand_scan_tail(). The idea is that the vendor specific code for the NAND chip must implement ->read_page() and ->write_page(). It may optionally provide its own ->read_page_raw() and ->write_page_raw() as well. For OOB operation, we assume the standard operations are good enough, but they can be overridden by the vendor specific code if needed. Signed-off-by: Thomas Petazzoni Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 8f67b1581683..603522097ec9 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -116,6 +116,7 @@ typedef enum { NAND_ECC_HW, NAND_ECC_HW_SYNDROME, NAND_ECC_HW_OOB_FIRST, + NAND_ECC_ON_DIE, } nand_ecc_modes_t; enum nand_ecc_algo { -- cgit v1.2.3 From cc0f51ec111266f5d255e753bf3254ad411d5c12 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sat, 29 Apr 2017 11:06:44 +0200 Subject: mtd: nand: export nand_{read,write}_page_raw() The nand_read_page_raw() and nand_write_page_raw() functions might be re-used by vendor-specific implementations of the read_page/write_page functions. Instead of having vendor-specific code duplicate this code, it is much better to export those functions and allow them to be re-used. Signed-off-by: Thomas Petazzoni Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 603522097ec9..7a01d2eb7443 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1259,6 +1259,14 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page); +/* Default read_page_raw implementation */ +int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, + uint8_t *buf, int oob_required, int page); + +/* Default write_page_raw implementation */ +int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, + const uint8_t *buf, int oob_required, int page); + /* Reset and initialize a NAND device */ int nand_reset(struct nand_chip *chip, int chipnr); -- cgit v1.2.3 From c512f36b581862df20acef050c3e1a875166bd6f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 09:31:19 +0200 Subject: sunrpc: properly type argument to kxdreproc_t Pass struct rpc_request as the first argument instead of an untyped blob, and mark the data object as const. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton --- include/linux/sunrpc/xdr.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 054c8cde18f3..290f189de200 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -17,6 +17,8 @@ #include #include +struct rpc_rqst; + /* * Buffer adjustment */ @@ -222,7 +224,8 @@ struct xdr_stream { /* * These are the xdr_stream style generic XDR encode and decode functions. */ -typedef void (*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); +typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + const void *obj); typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); -- cgit v1.2.3 From 73c8dc133afb0cbe72a9234894ea72c2a0e71a73 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 14:58:11 +0200 Subject: sunrpc: properly type argument to kxdrdproc_t Pass struct rpc_request as the first argument instead of an untyped blob. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Acked-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 290f189de200..ed0fbf0d8d0f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -226,7 +226,8 @@ struct xdr_stream { */ typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, const void *obj); -typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); +typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); -- cgit v1.2.3 From 1c5876ddbdb401f814ef717394826e7dfb6704d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 23:27:10 +0200 Subject: sunrpc: move p_count out of struct rpc_procinfo p_count is the only writeable memeber of struct rpc_procinfo, which is a good candidate to be const-ified as it contains function pointers. This patch moves it into out out struct rpc_procinfo, and into a separate writable array that is pointed to by struct rpc_version and indexed by p_statidx. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/clnt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6095ecba0dde..c75ba37151fe 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -88,6 +88,7 @@ struct rpc_version { u32 number; /* version number */ unsigned int nrprocs; /* number of procs */ struct rpc_procinfo * procs; /* procedure array */ + unsigned int *counts; /* call counts */ }; /* @@ -99,7 +100,6 @@ struct rpc_procinfo { kxdrdproc_t p_decode; /* XDR decode function */ unsigned int p_arglen; /* argument hdr length (u32) */ unsigned int p_replen; /* reply hdr length (u32) */ - unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ u32 p_statidx; /* Which procedure to account */ const char * p_name; /* name of procedure */ -- cgit v1.2.3 From 499b4988109e91b76f231fb1b4f1e53ec3260686 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 15:36:49 +0200 Subject: sunrpc: mark all struct rpc_procinfo instances as const struct rpc_procinfo contains function pointers, and marking it as constant avoids it being able to be used as an attach vector for code injections. Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 ++-- include/linux/sunrpc/sched.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c75ba37151fe..55ef67bea06b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -39,7 +39,7 @@ struct rpc_clnt { struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ struct rpc_xprt __rcu * cl_xprt; /* transport */ - struct rpc_procinfo * cl_procinfo; /* procedure info */ + const struct rpc_procinfo *cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ cl_vers, /* RPC version number */ cl_maxproc; /* max procedure number */ @@ -87,7 +87,7 @@ struct rpc_program { struct rpc_version { u32 number; /* version number */ unsigned int nrprocs; /* number of procs */ - struct rpc_procinfo * procs; /* procedure array */ + const struct rpc_procinfo *procs; /* procedure array */ unsigned int *counts; /* call counts */ }; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 7ba040c797ec..ed60253abd0a 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -22,7 +22,7 @@ */ struct rpc_procinfo; struct rpc_message { - struct rpc_procinfo * rpc_proc; /* Procedure information */ + const struct rpc_procinfo *rpc_proc; /* Procedure information */ void * rpc_argp; /* Arguments */ void * rpc_resp; /* Result */ struct rpc_cred * rpc_cred; /* Credentials */ -- cgit v1.2.3 From a6beb73272b4c0108e41bc7c7b5a447ae6c92863 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 17:35:49 +0200 Subject: sunrpc: properly type pc_func callbacks Drop the argp and resp arguments as they can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to svc_procfunc as well as the svc_procfunc typedef itself. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 94631026f79c..5c222af2db41 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -418,9 +418,9 @@ struct svc_version { /* * RPC procedure info */ -typedef __be32 (*svc_procfunc)(struct svc_rqst *, void *argp, void *resp); struct svc_procedure { - svc_procfunc pc_func; /* process the request */ + /* process the request: */ + __be32 (*pc_func)(struct svc_rqst *); kxdrproc_t pc_decode; /* XDR decode args */ kxdrproc_t pc_encode; /* XDR encode result */ kxdrproc_t pc_release; /* XDR free result */ -- cgit v1.2.3 From 8537488b5a2f33980e33f654b0a515304de2b267 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 18:48:24 +0200 Subject: sunrpc: properly type pc_release callbacks Drop the p and resp arguments as they are always NULL or can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5c222af2db41..1381e1343640 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -423,7 +423,8 @@ struct svc_procedure { __be32 (*pc_func)(struct svc_rqst *); kxdrproc_t pc_decode; /* XDR decode args */ kxdrproc_t pc_encode; /* XDR encode result */ - kxdrproc_t pc_release; /* XDR free result */ + /* XDR free result: */ + void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_count; /* call count */ -- cgit v1.2.3 From 026fec7e7c4723b5f26a753bbcad69f68c8299d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:01:48 +0200 Subject: sunrpc: properly type pc_decode callbacks Drop the argp argument as it can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig --- include/linux/lockd/xdr.h | 18 +++++++++--------- include/linux/lockd/xdr4.h | 18 +++++++++--------- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 20 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d39ed1cc5fbf..0416600844ce 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -95,19 +95,19 @@ struct nlm_reboot { */ #define NLMSVC_XDRSIZE sizeof(struct nlm_args) -int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_decode_res(struct svc_rqst *, __be32 *); int nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *); -int nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *); -int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_void(struct svc_rqst *, __be32 *); +int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *); +int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); +int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); /* int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index e58c88b52ce1..951bbe31fdb8 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -23,19 +23,19 @@ -int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_decode_res(struct svc_rqst *, __be32 *); int nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *); -int nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *); -int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_void(struct svc_rqst *, __be32 *); +int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *); +int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); +int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); /* int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1381e1343640..047f04411dd4 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -421,7 +421,8 @@ struct svc_version { struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); - kxdrproc_t pc_decode; /* XDR decode args */ + /* XDR decode args: */ + int (*pc_decode)(struct svc_rqst *, __be32 *data); kxdrproc_t pc_encode; /* XDR encode result */ /* XDR free result: */ void (*pc_release)(struct svc_rqst *); -- cgit v1.2.3 From 63f8de37951a64cc24479eafd33085537e088075 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:42:02 +0200 Subject: sunrpc: properly type pc_encode callbacks Drop the resp argument as it can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/lockd/xdr.h | 8 ++++---- include/linux/lockd/xdr4.h | 8 ++++---- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 0416600844ce..7acbecc21a40 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -96,16 +96,16 @@ struct nlm_reboot { #define NLMSVC_XDRSIZE sizeof(struct nlm_args) int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_res(struct svc_rqst *, __be32 *); int nlmsvc_decode_res(struct svc_rqst *, __be32 *); -int nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *); +int nlmsvc_encode_void(struct svc_rqst *, __be32 *); int nlmsvc_decode_void(struct svc_rqst *, __be32 *); int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *); int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); /* diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 951bbe31fdb8..bf1645609225 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -24,16 +24,16 @@ int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_res(struct svc_rqst *, __be32 *); int nlm4svc_decode_res(struct svc_rqst *, __be32 *); -int nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *); +int nlm4svc_encode_void(struct svc_rqst *, __be32 *); int nlm4svc_decode_void(struct svc_rqst *, __be32 *); int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *); int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 047f04411dd4..6cfe41db7f31 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -423,7 +423,8 @@ struct svc_procedure { __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ int (*pc_decode)(struct svc_rqst *, __be32 *data); - kxdrproc_t pc_encode; /* XDR encode result */ + /* XDR encode result: */ + int (*pc_encode)(struct svc_rqst *, __be32 *data); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ -- cgit v1.2.3 From 35f297e5370bd511a171f7de0c5a31ee661f2e7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:56:10 +0200 Subject: sunrpc: remove kxdrproc_t Remove the now unused typedef. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/xdr.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index ed0fbf0d8d0f..261b48a2701d 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -34,13 +34,6 @@ struct xdr_netobj { u8 * data; }; -/* - * This is the legacy generic XDR function. rqstp is either a rpc_rqst - * (client side) or svc_rqst pointer (server side). - * Encode functions always assume there's enough room in the buffer. - */ -typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj); - /* * Basic structure for transmission/reception of a client XDR message. * Features a header (for a linear buffer containing RPC headers -- cgit v1.2.3 From 7fd38af9cae6aef1dfd28a7d1bd214eb5ddb7d53 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 23:40:27 +0200 Subject: sunrpc: move pc_count out of struct svc_procinfo pc_count is the only writeable memeber of struct svc_procinfo, which is a good candidate to be const-ified as it contains function pointers. This patch moves it into out out struct svc_procinfo, and into a separate writable array that is pointed to by struct svc_version. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6cfe41db7f31..9f00384153f4 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -397,6 +397,7 @@ struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ struct svc_procedure * vs_proc; /* per-procedure info */ + unsigned int *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ /* Don't register with rpcbind */ @@ -429,7 +430,6 @@ struct svc_procedure { void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ unsigned int pc_ressize; /* result struct size */ - unsigned int pc_count; /* call count */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ }; -- cgit v1.2.3 From 860bda29b99afdc072a7a796fe81185f7ae85deb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 16:11:49 +0200 Subject: sunrpc: mark all struct svc_procinfo instances as const struct svc_procinfo contains function pointers, and marking it as constant avoids it being able to be used as an attach vector for code injections. Signed-off-by: Christoph Hellwig --- include/linux/lockd/lockd.h | 4 ++-- include/linux/sunrpc/svc.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 41f7b6a04d69..3eca67728366 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -192,9 +192,9 @@ struct nlm_block { * Global variables */ extern const struct rpc_program nlm_program; -extern struct svc_procedure nlmsvc_procedures[]; +extern const struct svc_procedure nlmsvc_procedures[]; #ifdef CONFIG_LOCKD_V4 -extern struct svc_procedure nlmsvc_procedures4[]; +extern const struct svc_procedure nlmsvc_procedures4[]; #endif extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 9f00384153f4..984e6b9c3043 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -237,7 +237,7 @@ struct svc_rqst { struct svc_serv * rq_server; /* RPC service definition */ struct svc_pool * rq_pool; /* thread pool */ - struct svc_procedure * rq_procinfo; /* procedure info */ + const struct svc_procedure *rq_procinfo;/* procedure info */ struct auth_ops * rq_authop; /* authentication flavour */ struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ @@ -396,7 +396,7 @@ struct svc_program { struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ - struct svc_procedure * vs_proc; /* per-procedure info */ + const struct svc_procedure *vs_proc; /* per-procedure info */ unsigned int *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ -- cgit v1.2.3 From e9679189e34b25a1b9aa77fe37d331559d1544af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 16:21:37 +0200 Subject: sunrpc: mark all struct svc_version instances as const Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 984e6b9c3043..e85267899753 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -383,7 +383,7 @@ struct svc_program { unsigned int pg_lovers; /* lowest version */ unsigned int pg_hivers; /* highest version */ unsigned int pg_nvers; /* number of versions */ - struct svc_version ** pg_vers; /* version array */ + const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ struct svc_stat * pg_stats; /* rpc statistics */ -- cgit v1.2.3 From cfc5604c488ccd17936b69008af0c9ae050f4a08 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Tue, 25 Apr 2017 22:08:46 +0200 Subject: mtd: spi-nor: introduce SPI 1-2-2 and SPI 1-4-4 protocols This patch changes the prototype of spi_nor_scan(): its 3rd parameter is replaced by a 'struct spi_nor_hwcaps' pointer, which tells the spi-nor framework about the actual hardware capabilities supported by the SPI controller and its driver. Besides, this patch also introduces a new 'struct spi_nor_flash_parameter' telling the spi-nor framework about the hardware capabilities supported by the SPI flash memory and the associated settings required to use those hardware caps. Then, to improve the readability of spi_nor_scan(), the discovery of the memory settings and the memory initialization are now split into two dedicated functions. 1 - spi_nor_init_params() The spi_nor_init_params() function is responsible for initializing the 'struct spi_nor_flash_parameter'. Currently this structure is filled with legacy values but further patches will allow to override some parameter values dynamically, for instance by reading the JESD216 Serial Flash Discoverable Parameter (SFDP) tables from the SPI memory. The spi_nor_init_params() function only deals with the hardware capabilities of the SPI flash memory: especially it doesn't care about the hardware capabilities supported by the SPI controller. 2 - spi_nor_setup() The second function is called once the 'struct spi_nor_flash_parameter' has been initialized by spi_nor_init_params(). With both 'struct spi_nor_flash_parameter' and 'struct spi_nor_hwcaps', the new argument of spi_nor_scan(), spi_nor_setup() computes the best match between hardware caps supported by both the (Q)SPI memory and controller hence selecting the relevant settings for (Fast) Read and Page Program operations. Signed-off-by: Cyrille Pitchen Reviewed-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 119 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 110 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index f2a718030476..60db1585f94c 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -119,13 +119,63 @@ /* Configuration Register bits. */ #define CR_QUAD_EN_SPAN BIT(1) /* Spansion Quad I/O */ -enum read_mode { - SPI_NOR_NORMAL = 0, - SPI_NOR_FAST, - SPI_NOR_DUAL, - SPI_NOR_QUAD, +/* Supported SPI protocols */ +#define SNOR_PROTO_INST_MASK GENMASK(23, 16) +#define SNOR_PROTO_INST_SHIFT 16 +#define SNOR_PROTO_INST(_nbits) \ + ((((unsigned long)(_nbits)) << SNOR_PROTO_INST_SHIFT) & \ + SNOR_PROTO_INST_MASK) + +#define SNOR_PROTO_ADDR_MASK GENMASK(15, 8) +#define SNOR_PROTO_ADDR_SHIFT 8 +#define SNOR_PROTO_ADDR(_nbits) \ + ((((unsigned long)(_nbits)) << SNOR_PROTO_ADDR_SHIFT) & \ + SNOR_PROTO_ADDR_MASK) + +#define SNOR_PROTO_DATA_MASK GENMASK(7, 0) +#define SNOR_PROTO_DATA_SHIFT 0 +#define SNOR_PROTO_DATA(_nbits) \ + ((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \ + SNOR_PROTO_DATA_MASK) + +#define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits) \ + (SNOR_PROTO_INST(_inst_nbits) | \ + SNOR_PROTO_ADDR(_addr_nbits) | \ + SNOR_PROTO_DATA(_data_nbits)) + +enum spi_nor_protocol { + SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1), + SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2), + SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4), + SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2), + SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4), + SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2), + SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4), }; +static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto) +{ + return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >> + SNOR_PROTO_INST_SHIFT; +} + +static inline u8 spi_nor_get_protocol_addr_nbits(enum spi_nor_protocol proto) +{ + return ((unsigned long)(proto & SNOR_PROTO_ADDR_MASK)) >> + SNOR_PROTO_ADDR_SHIFT; +} + +static inline u8 spi_nor_get_protocol_data_nbits(enum spi_nor_protocol proto) +{ + return ((unsigned long)(proto & SNOR_PROTO_DATA_MASK)) >> + SNOR_PROTO_DATA_SHIFT; +} + +static inline u8 spi_nor_get_protocol_width(enum spi_nor_protocol proto) +{ + return spi_nor_get_protocol_data_nbits(proto); +} + #define SPI_NOR_MAX_CMD_SIZE 8 enum spi_nor_ops { SPI_NOR_OPS_READ = 0, @@ -154,9 +204,11 @@ enum spi_nor_option_flags { * @read_opcode: the read opcode * @read_dummy: the dummy needed by the read operation * @program_opcode: the program opcode - * @flash_read: the mode of the read * @sst_write_second: used by the SST write operation * @flags: flag options for the current SPI-NOR (SNOR_F_*) + * @read_proto: the SPI protocol for read operations + * @write_proto: the SPI protocol for write operations + * @reg_proto the SPI protocol for read_reg/write_reg/erase operations * @cmd_buf: used by the write_reg * @prepare: [OPTIONAL] do some preparations for the * read/write/erase/lock/unlock operations @@ -185,7 +237,9 @@ struct spi_nor { u8 read_opcode; u8 read_dummy; u8 program_opcode; - enum read_mode flash_read; + enum spi_nor_protocol read_proto; + enum spi_nor_protocol write_proto; + enum spi_nor_protocol reg_proto; bool sst_write_second; u32 flags; u8 cmd_buf[SPI_NOR_MAX_CMD_SIZE]; @@ -219,11 +273,57 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor) return mtd_get_of_node(&nor->mtd); } +/** + * struct spi_nor_hwcaps - Structure for describing the hardware capabilies + * supported by the SPI controller (bus master). + * @mask: the bitmask listing all the supported hw capabilies + */ +struct spi_nor_hwcaps { + u32 mask; +}; + +/* + *(Fast) Read capabilities. + * MUST be ordered by priority: the higher bit position, the higher priority. + * As a matter of performances, it is relevant to use Quad SPI protocols first, + * then Dual SPI protocols before Fast Read and lastly (Slow) Read. + */ +#define SNOR_HWCAPS_READ_MASK GENMASK(7, 0) +#define SNOR_HWCAPS_READ BIT(0) +#define SNOR_HWCAPS_READ_FAST BIT(1) + +#define SNOR_HWCAPS_READ_DUAL GENMASK(4, 2) +#define SNOR_HWCAPS_READ_1_1_2 BIT(2) +#define SNOR_HWCAPS_READ_1_2_2 BIT(3) +#define SNOR_HWCAPS_READ_2_2_2 BIT(4) + +#define SNOR_HWCAPS_READ_QUAD GENMASK(7, 5) +#define SNOR_HWCAPS_READ_1_1_4 BIT(5) +#define SNOR_HWCAPS_READ_1_4_4 BIT(6) +#define SNOR_HWCAPS_READ_4_4_4 BIT(7) + +/* + * Page Program capabilities. + * MUST be ordered by priority: the higher bit position, the higher priority. + * Like (Fast) Read capabilities, Quad SPI protocols are preferred to the + * legacy SPI 1-1-1 protocol. + * Note that Dual Page Programs are not supported because there is no existing + * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory + * implements such commands. + */ +#define SNOR_HWCAPS_PP_MASK GENMASK(19, 16) +#define SNOR_HWCAPS_PP BIT(16) + +#define SNOR_HWCAPS_PP_QUAD GENMASK(19, 17) +#define SNOR_HWCAPS_PP_1_1_4 BIT(17) +#define SNOR_HWCAPS_PP_1_4_4 BIT(18) +#define SNOR_HWCAPS_PP_4_4_4 BIT(19) + /** * spi_nor_scan() - scan the SPI NOR * @nor: the spi_nor structure * @name: the chip type name - * @mode: the read mode supported by the driver + * @hwcaps: the hardware capabilities supported by the controller driver * * The drivers can use this fuction to scan the SPI NOR. * In the scanning, it will try to get all the necessary information to @@ -233,6 +333,7 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor) * * Return: 0 for success, others for failure. */ -int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode); +int spi_nor_scan(struct spi_nor *nor, const char *name, + const struct spi_nor_hwcaps *hwcaps); #endif -- cgit v1.2.3 From 15f55331527b1422eae683477f8a31fdfae93316 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Tue, 25 Apr 2017 22:08:48 +0200 Subject: mtd: spi-nor: introduce Double Transfer Rate (DTR) SPI protocols This patch introduces support to Double Transfer Rate (DTR) SPI protocols. DTR is used only for Fast Read operations. According to manufacturer datasheets, whatever the number of I/O lines used during instruction (x) and address/mode/dummy (y) clock cycles, DTR is used only during data (z) clock cycles of SPI x-y-z protocols. Signed-off-by: Cyrille Pitchen Reviewed-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 48 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 60db1585f94c..313dbe56f31a 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -73,6 +73,15 @@ #define SPINOR_OP_BE_32K_4B 0x5c /* Erase 32KiB block */ #define SPINOR_OP_SE_4B 0xdc /* Sector erase (usually 64KiB) */ +/* Double Transfer Rate opcodes - defined in JEDEC JESD216B. */ +#define SPINOR_OP_READ_1_1_1_DTR 0x0d +#define SPINOR_OP_READ_1_2_2_DTR 0xbd +#define SPINOR_OP_READ_1_4_4_DTR 0xed + +#define SPINOR_OP_READ_1_1_1_DTR_4B 0x0e +#define SPINOR_OP_READ_1_2_2_DTR_4B 0xbe +#define SPINOR_OP_READ_1_4_4_DTR_4B 0xee + /* Used for SST flashes only. */ #define SPINOR_OP_BP 0x02 /* Byte program */ #define SPINOR_OP_WRDI 0x04 /* Write disable */ @@ -138,10 +147,15 @@ ((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \ SNOR_PROTO_DATA_MASK) +#define SNOR_PROTO_IS_DTR BIT(24) /* Double Transfer Rate */ + #define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits) \ (SNOR_PROTO_INST(_inst_nbits) | \ SNOR_PROTO_ADDR(_addr_nbits) | \ SNOR_PROTO_DATA(_data_nbits)) +#define SNOR_PROTO_DTR(_inst_nbits, _addr_nbits, _data_nbits) \ + (SNOR_PROTO_IS_DTR | \ + SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits)) enum spi_nor_protocol { SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1), @@ -151,8 +165,17 @@ enum spi_nor_protocol { SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4), SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2), SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4), + + SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1), + SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2), + SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4), }; +static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto) +{ + return !!(proto & SNOR_PROTO_IS_DTR); +} + static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto) { return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >> @@ -288,19 +311,22 @@ struct spi_nor_hwcaps { * As a matter of performances, it is relevant to use Quad SPI protocols first, * then Dual SPI protocols before Fast Read and lastly (Slow) Read. */ -#define SNOR_HWCAPS_READ_MASK GENMASK(7, 0) +#define SNOR_HWCAPS_READ_MASK GENMASK(10, 0) #define SNOR_HWCAPS_READ BIT(0) #define SNOR_HWCAPS_READ_FAST BIT(1) - -#define SNOR_HWCAPS_READ_DUAL GENMASK(4, 2) -#define SNOR_HWCAPS_READ_1_1_2 BIT(2) -#define SNOR_HWCAPS_READ_1_2_2 BIT(3) -#define SNOR_HWCAPS_READ_2_2_2 BIT(4) - -#define SNOR_HWCAPS_READ_QUAD GENMASK(7, 5) -#define SNOR_HWCAPS_READ_1_1_4 BIT(5) -#define SNOR_HWCAPS_READ_1_4_4 BIT(6) -#define SNOR_HWCAPS_READ_4_4_4 BIT(7) +#define SNOR_HWCAPS_READ_1_1_1_DTR BIT(2) + +#define SNOR_HWCAPS_READ_DUAL GENMASK(6, 3) +#define SNOR_HWCAPS_READ_1_1_2 BIT(3) +#define SNOR_HWCAPS_READ_1_2_2 BIT(4) +#define SNOR_HWCAPS_READ_2_2_2 BIT(5) +#define SNOR_HWCAPS_READ_1_2_2_DTR BIT(6) + +#define SNOR_HWCAPS_READ_QUAD GENMASK(10, 7) +#define SNOR_HWCAPS_READ_1_1_4 BIT(7) +#define SNOR_HWCAPS_READ_1_4_4 BIT(8) +#define SNOR_HWCAPS_READ_4_4_4 BIT(9) +#define SNOR_HWCAPS_READ_1_4_4_DTR BIT(10) /* * Page Program capabilities. -- cgit v1.2.3 From fe488a5e48c69204c3b1ad6fa3282e12dbfaabe7 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Tue, 25 Apr 2017 22:08:49 +0200 Subject: mtd: spi-nor: introduce Octo SPI protocols This patch starts adding support to Octo SPI protocols (SPI x-y-8). Op codes for Fast Read and/or Page Program operations using Octo SPI protocols are not known yet (no JEDEC specification has defined them yet) but we'd rather introduce the Octo SPI protocols now so it's done as it should be. Signed-off-by: Cyrille Pitchen Reviewed-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 313dbe56f31a..55faa2f07cca 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -161,14 +161,18 @@ enum spi_nor_protocol { SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1), SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2), SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4), + SNOR_PROTO_1_1_8 = SNOR_PROTO_STR(1, 1, 8), SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2), SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4), + SNOR_PROTO_1_8_8 = SNOR_PROTO_STR(1, 8, 8), SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2), SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4), + SNOR_PROTO_8_8_8 = SNOR_PROTO_STR(8, 8, 8), SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1), SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2), SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4), + SNOR_PROTO_1_8_8_DTR = SNOR_PROTO_DTR(1, 8, 8), }; static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto) @@ -308,10 +312,11 @@ struct spi_nor_hwcaps { /* *(Fast) Read capabilities. * MUST be ordered by priority: the higher bit position, the higher priority. - * As a matter of performances, it is relevant to use Quad SPI protocols first, - * then Dual SPI protocols before Fast Read and lastly (Slow) Read. + * As a matter of performances, it is relevant to use Octo SPI protocols first, + * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly + * (Slow) Read. */ -#define SNOR_HWCAPS_READ_MASK GENMASK(10, 0) +#define SNOR_HWCAPS_READ_MASK GENMASK(14, 0) #define SNOR_HWCAPS_READ BIT(0) #define SNOR_HWCAPS_READ_FAST BIT(1) #define SNOR_HWCAPS_READ_1_1_1_DTR BIT(2) @@ -328,16 +333,22 @@ struct spi_nor_hwcaps { #define SNOR_HWCAPS_READ_4_4_4 BIT(9) #define SNOR_HWCAPS_READ_1_4_4_DTR BIT(10) +#define SNOR_HWCPAS_READ_OCTO GENMASK(14, 11) +#define SNOR_HWCAPS_READ_1_1_8 BIT(11) +#define SNOR_HWCAPS_READ_1_8_8 BIT(12) +#define SNOR_HWCAPS_READ_8_8_8 BIT(13) +#define SNOR_HWCAPS_READ_1_8_8_DTR BIT(14) + /* * Page Program capabilities. * MUST be ordered by priority: the higher bit position, the higher priority. - * Like (Fast) Read capabilities, Quad SPI protocols are preferred to the + * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the * legacy SPI 1-1-1 protocol. * Note that Dual Page Programs are not supported because there is no existing * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory * implements such commands. */ -#define SNOR_HWCAPS_PP_MASK GENMASK(19, 16) +#define SNOR_HWCAPS_PP_MASK GENMASK(22, 16) #define SNOR_HWCAPS_PP BIT(16) #define SNOR_HWCAPS_PP_QUAD GENMASK(19, 17) @@ -345,6 +356,11 @@ struct spi_nor_hwcaps { #define SNOR_HWCAPS_PP_1_4_4 BIT(18) #define SNOR_HWCAPS_PP_4_4_4 BIT(19) +#define SNOR_HWCAPS_PP_OCTO GENMASK(22, 20) +#define SNOR_HWCAPS_PP_1_1_8 BIT(20) +#define SNOR_HWCAPS_PP_1_8_8 BIT(21) +#define SNOR_HWCAPS_PP_8_8_8 BIT(22) + /** * spi_nor_scan() - scan the SPI NOR * @nor: the spi_nor structure -- cgit v1.2.3 From b5dceda1f7ef66cba6f8d766502f242a27f96e6d Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 15 May 2017 10:34:52 +0530 Subject: lib/raid6: Add log-of-2 table for RAID6 HW requiring disk position The raid6_gfexp table represents {2}^n values for 0 <= n < 256. The Linux async_tx framework pass values from raid6_gfexp as coefficients for each source to prep_dma_pq() callback of DMA channel with PQ capability. This creates problem for RAID6 offload engines (such as Broadcom SBA) which take disk position (i.e. log of {2}) instead of multiplicative cofficients from raid6_gfexp table. This patch adds raid6_gflog table having log-of-2 value for any given x such that 0 <= x < 256. For any given disk coefficient x, the corresponding disk position is given by raid6_gflog[x]. The RAID6 offload engine driver can use this newly added raid6_gflog table to get disk position from multiplicative coefficient. Signed-off-by: Anup Patel Reviewed-by: Scott Branden Reviewed-by: Ray Jui Acked-by: Shaohua Li Signed-off-by: Vinod Koul --- include/linux/raid/pq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 4d57bbaaa1bf..30f945329818 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -142,6 +142,7 @@ int raid6_select_algo(void); extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256))); extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256))); extern const u8 raid6_gfexp[256] __attribute__((aligned(256))); +extern const u8 raid6_gflog[256] __attribute__((aligned(256))); extern const u8 raid6_gfinv[256] __attribute__((aligned(256))); extern const u8 raid6_gfexi[256] __attribute__((aligned(256))); -- cgit v1.2.3 From 7b4ff1adb57ad96d8f12a05d8c661a3d8c4d2be1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 11 May 2017 10:17:45 -0300 Subject: mutex, futex: adjust kernel-doc markups to generate ReST There are a few issues on some kernel-doc markups that was causing troubles with kernel-doc output on ReST format: ./kernel/futex.c:492: WARNING: Inline emphasis start-string without end-string. ./kernel/futex.c:1264: WARNING: Block quote ends without a blank line; unexpected unindent. ./kernel/futex.c:1721: WARNING: Block quote ends without a blank line; unexpected unindent. ./kernel/futex.c:2338: WARNING: Block quote ends without a blank line; unexpected unindent. ./kernel/futex.c:2426: WARNING: Block quote ends without a blank line; unexpected unindent. ./kernel/futex.c:2899: WARNING: Block quote ends without a blank line; unexpected unindent. ./kernel/futex.c:2972: WARNING: Block quote ends without a blank line; unexpected unindent. Fix them. No functional changes. Acked-by: Darren Hart (VMware) Signed-off-by: Mauro Carvalho Chehab --- include/linux/mutex.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 1127fe31645d..ffcba1f337da 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -214,9 +214,9 @@ enum mutex_trylock_recursive_enum { * raisins, and once those are gone this will be removed. * * Returns: - * MUTEX_TRYLOCK_FAILED - trylock failed, - * MUTEX_TRYLOCK_SUCCESS - lock acquired, - * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. + * - MUTEX_TRYLOCK_FAILED - trylock failed, + * - MUTEX_TRYLOCK_SUCCESS - lock acquired, + * - MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. */ static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum mutex_trylock_recursive(struct mutex *lock) -- cgit v1.2.3 From 771b00a84be46d10e3f74af2d86d226302c907c5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 12 May 2017 09:19:29 -0300 Subject: net: skbuff.h: properly escape a macro name on kernel-doc The "%" escape code of kernel-doc only handle letters. It doesn't handle special chars. So, use the ``literal`` notation. That fixes this warning: ./include/linux/skbuff.h:2695: WARNING: Inline literal start-string without end-string. No functional changes. Signed-off-by: Mauro Carvalho Chehab --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a098d95b3d84..25b1659c832a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2691,7 +2691,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); * @offset: the offset within the fragment (starting at the * fragment's own offset) * @size: the number of bytes to map - * @dir: the direction of the mapping (%PCI_DMA_*) + * @dir: the direction of the mapping (``PCI_DMA_*``) * * Maps the page associated with @frag to @device. */ -- cgit v1.2.3 From d651983dde41a854e25664d98cbfc999d55785a8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 12 May 2017 09:35:46 -0300 Subject: net: fix some identation issues at kernel-doc markups Sphinx is very pedantic with regards to identation and escape sequences: ./include/net/sock.h:1967: ERROR: Unexpected indentation. ./include/net/sock.h:1969: ERROR: Unexpected indentation. ./include/net/sock.h:1970: WARNING: Block quote ends without a blank line; unexpected unindent. ./include/net/sock.h:1971: WARNING: Block quote ends without a blank line; unexpected unindent. ./include/net/sock.h:2268: WARNING: Inline emphasis start-string without end-string. ./net/core/sock.c:2686: ERROR: Unexpected indentation. ./net/core/sock.c:2687: WARNING: Block quote ends without a blank line; unexpected unindent. ./net/core/datagram.c:182: WARNING: Inline emphasis start-string without end-string. ./include/linux/netdevice.h:1444: ERROR: Unexpected indentation. ./drivers/net/phy/phy.c:381: ERROR: Unexpected indentation. ./drivers/net/phy/phy.c:382: WARNING: Block quote ends without a blank line; unexpected unindent. - Fix spacing where needed; - Properly escape constants; - Use a literal block for a race description. No functional changes. Signed-off-by: Mauro Carvalho Chehab --- include/linux/netdevice.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c23bd2efb56..56d54b6fac45 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1433,13 +1433,14 @@ enum netdev_priv_flags { /** * struct net_device - The DEVICE structure. - * Actually, this whole structure is a big mistake. It mixes I/O - * data with strictly "high-level" data, and it has to know about - * almost every data structure used in the INET module. + * + * Actually, this whole structure is a big mistake. It mixes I/O + * data with strictly "high-level" data, and it has to know about + * almost every data structure used in the INET module. * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name - * of the interface. + * of the interface. * * @name_hlist: Device name hash chain, please keep it close to name[] * @ifalias: SNMP alias -- cgit v1.2.3 From b6f6c29454d236e85f2912cb0f9366825ca1b0be Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 13 May 2017 07:40:36 -0300 Subject: mtd: adjust kernel-docs to avoid Sphinx/kerneldoc warnings ./drivers/mtd/nand/nand_bbt.c:1: warning: no structured comments found ./include/linux/mtd/nand.h:785: ERROR: Unexpected indentation. ./drivers/mtd/nand/nand_base.c:449: WARNING: Definition list ends without a blank line; unexpected unindent. ./drivers/mtd/nand/nand_base.c:1161: ERROR: Unexpected indentation. ./drivers/mtd/nand/nand_base.c:1162: WARNING: Block quote ends without a blank line; unexpected unindent. Signed-off-by: Mauro Carvalho Chehab --- include/linux/mtd/nand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9591e0fbe5bd..3d5b20379ba3 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -779,7 +779,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * Minimum amount of bit errors per @ecc_step_ds guaranteed * to be correctable. If unknown, set to zero. * @ecc_step_ds: [INTERN] ECC step required by the @ecc_strength_ds, - * also from the datasheet. It is the recommended ECC step + * also from the datasheet. It is the recommended ECC step * size, if known; if unknown, set to zero. * @onfi_timing_mode_default: [INTERN] default ONFI timing mode. This field is * set to the actually used ONFI mode if the chip is -- cgit v1.2.3 From 19285f3c4669c8b0cea8fb6c452c83db9e6386be Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 14 May 2017 11:52:56 -0300 Subject: ata: update references for libata documentation The libata documentation is now using ReST. Update references to it to point to the new place. Signed-off-by: Mauro Carvalho Chehab --- include/linux/ata.h | 2 +- include/linux/libata.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index ad7d9ee89ff0..73fe18edfdaf 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -20,7 +20,7 @@ * * * libata documentation is available via 'make {ps|pdf}docs', - * as Documentation/DocBook/libata.* + * as Documentation/driver-api/libata.rst * * Hardware documentation available from http://www.t13.org/ * diff --git a/include/linux/libata.h b/include/linux/libata.h index c9a69fc8821e..9e6633235ad7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -19,7 +19,7 @@ * * * libata documentation is available via 'make {ps|pdf}docs', - * as Documentation/DocBook/libata.* + * as Documentation/driver-api/libata.rst * */ -- cgit v1.2.3 From e1b4fc7add72f565d9c35066d85108346e01d3e9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 14 May 2017 12:04:55 -0300 Subject: fs: update location of filesystems documentation The filesystem documentation was moved from DocBook to Documentation/filesystems/. Update it at the sources. Signed-off-by: Mauro Carvalho Chehab --- include/linux/debugfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 9174b0d28582..aa86e6d8c1aa 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -9,7 +9,7 @@ * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. - * See Documentation/DocBook/filesystems for more details. + * See Documentation/filesystems/ for more details. */ #ifndef _DEBUGFS_H_ -- cgit v1.2.3 From 12e84c71b7d4ee38d51377fd494ac748ee4e6912 Mon Sep 17 00:00:00 2001 From: Okash Khawaja Date: Mon, 15 May 2017 18:45:32 +0100 Subject: tty: export tty_open_by_driver This exports tty_open_by_driver so that it can be called from other places inside the kernel. The checks for null file pointer are based on Alan Cox's patch here: http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1215095.html. Description below is quoted from it: "[RFC] tty_port: allow a port to be opened with a tty that has no file handle Let us create tty objects entirely in kernel space. Untested proposal to show why all the ideas around rewriting half the uart stack are not needed. With this a kernel created non file backed tty object could be used to handle data, and set terminal modes. Not all ldiscs can cope with this as N_TTY in particular has to work back to the fs/tty layer. The tty_port code is however otherwise clean of file handles as far as I can tell as is the low level tty port write path used by the ldisc, the configuration low level interfaces and most of the ldiscs. Currently you don't have any exposure to see tty hangups because those are built around the file layer. However a) it's a fixed port so you probably don't care about that b) if you do we can add a callback and c) you almost certainly don't want the userspace tear down/rebuild behaviour anyway. This should however be sufficient if we wanted for example to enumerate all the bluetooth bound fixed ports via ACPI and make them directly available. It doesn't deal with the case of a user opening a port that's also kernel opened and that would need some locking out (so it returned EBUSY if bound to a kernel device of some kind). That needs resolving along with how you "up" or "down" your new bluetooth device, or enumerate it while providing the existing tty API to avoid regressions (and to debug)." The exported funtion is used later in this patch set to gain access to tty_struct. [changed export symbol level - gkh] Signed-off-by: Okash Khawaja Reviewed-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index d07cd2105a6c..c9f9fd2c4eef 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -400,6 +400,8 @@ extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); +extern struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode, + struct file *filp); #else static inline void tty_kref_put(struct tty_struct *tty) { } -- cgit v1.2.3 From 9bb9a39ce51eae886575251e87d9292f679e3e32 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 16 May 2017 09:16:37 -0300 Subject: ata: update references for libata documentation The libata documentation is now using ReST. Update references to it to point to the new place. Signed-off-by: Mauro Carvalho Chehab Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo --- include/linux/ata.h | 2 +- include/linux/libata.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index ad7d9ee89ff0..73fe18edfdaf 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -20,7 +20,7 @@ * * * libata documentation is available via 'make {ps|pdf}docs', - * as Documentation/DocBook/libata.* + * as Documentation/driver-api/libata.rst * * Hardware documentation available from http://www.t13.org/ * diff --git a/include/linux/libata.h b/include/linux/libata.h index c9a69fc8821e..9e6633235ad7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -19,7 +19,7 @@ * * * libata documentation is available via 'make {ps|pdf}docs', - * as Documentation/DocBook/libata.* + * as Documentation/driver-api/libata.rst * */ -- cgit v1.2.3 From 138bc7969c24c6cbba28e919c2376ad10a46fc60 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 7 Apr 2017 19:22:06 -0700 Subject: iio: hid-sensor-hub: Implement batch mode HID sensor hubs using Integrated Senor Hub (ISH) has added capability to support batch mode. This allows host processor to go to sleep for extended duration, while the sensor hub is storing samples in its internal buffers. 'Commit f4f4673b7535 ("iio: add support for hardware fifo")' implements feature in IIO core to implement such feature. This feature is used in bmc150-accel-core.c to implement batch mode. This implementation allows software device buffer watermark to be used as a hint to adjust hardware FIFO. But HID sensor hubs don't allow to change internal buffer size of FIFOs. Instead an additional usage id to set "maximum report latency" is defined. This allows host to go to sleep upto this latency period without getting any report. Since there is no ABI to set this latency, a new attribute "hwfifo_timeout" is added so that user mode can specify a latency. This change checks presence of usage id to get/set maximum report latency and if present, it will expose hwfifo_timeout. Signed-off-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 5 +++++ include/linux/hid-sensor-ids.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index f32d7c392c1e..fc7aae64dcde 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -233,12 +233,14 @@ struct hid_sensor_common { atomic_t user_requested_state; int poll_interval; int raw_hystersis; + int latency_ms; struct iio_trigger *trigger; int timestamp_ns_scale; struct hid_sensor_hub_attribute_info poll; struct hid_sensor_hub_attribute_info report_state; struct hid_sensor_hub_attribute_info power_state; struct hid_sensor_hub_attribute_info sensitivity; + struct hid_sensor_hub_attribute_info report_latency; struct work_struct work; }; @@ -276,5 +278,8 @@ s32 hid_sensor_read_poll_value(struct hid_sensor_common *st); int64_t hid_sensor_convert_timestamp(struct hid_sensor_common *st, int64_t raw_value); +bool hid_sensor_batch_mode_supported(struct hid_sensor_common *st); +int hid_sensor_set_report_latency(struct hid_sensor_common *st, int latency); +int hid_sensor_get_report_latency(struct hid_sensor_common *st); #endif diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 5af62c7e49f3..76033e0420a7 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -152,6 +152,9 @@ #define HID_USAGE_SENSOR_PROP_REPORT_STATE 0x200316 #define HID_USAGE_SENSOR_PROY_POWER_STATE 0x200319 +/* Batch mode selectors */ +#define HID_USAGE_SENSOR_PROP_REPORT_LATENCY 0x20031B + /* Per data field properties */ #define HID_USAGE_SENSOR_DATA_MOD_NONE 0x00 #define HID_USAGE_SENSOR_DATA_MOD_CHANGE_SENSITIVITY_ABS 0x1000 -- cgit v1.2.3 From 65101aeca52241a05e66f23c96eb896c9412718d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 16 May 2017 11:20:13 +0200 Subject: net/sock: factor out dequeue/peek with offset code And update __sk_queue_drop_skb() to work on the specified queue. This will help the udp protocol to use an additional private rx queue in a later patch. Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a098d95b3d84..bfc7892f6c33 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3056,6 +3056,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, const struct sk_buff *skb); +struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, + struct sk_buff_head *queue, + unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), + int *peeked, int *off, int *err, + struct sk_buff **last); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, void (*destructor)(struct sock *sk, struct sk_buff *skb), -- cgit v1.2.3 From 2276f58ac5890e58d2b6a48b95493faff7347e3a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 16 May 2017 11:20:14 +0200 Subject: udp: use a separate rx queue for packet reception under udp flood the sk_receive_queue spinlock is heavily contended. This patch try to reduce the contention on such lock adding a second receive queue to the udp sockets; recvmsg() looks first in such queue and, only if empty, tries to fetch the data from sk_receive_queue. The latter is spliced into the newly added queue every time the receive path has to acquire the sk_receive_queue lock. The accounting of forward allocated memory is still protected with the sk_receive_queue lock, so udp_rmem_release() needs to acquire both locks when the forward deficit is flushed. On specific scenarios we can end up acquiring and releasing the sk_receive_queue lock multiple times; that will be covered by the next patch Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/udp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 6cb4061a720d..eaea63bc79bb 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -80,6 +80,9 @@ struct udp_sock { struct sk_buff *skb, int nhoff); + /* udp_recvmsg try to use this before splicing sk_receive_queue */ + struct sk_buff_head reader_queue ____cacheline_aligned_in_smp; + /* This field is dirtied by udp_recvmsg() */ int forward_deficit; }; -- cgit v1.2.3 From 218af599fa635b107cfe10acf3249c4dfe5e4123 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 May 2017 04:24:36 -0700 Subject: tcp: internal implementation for pacing BBR congestion control depends on pacing, and pacing is currently handled by sch_fq packet scheduler for performance reasons, and also because implemening pacing with FQ was convenient to truly avoid bursts. However there are many cases where this packet scheduler constraint is not practical. - Many linux hosts are not focusing on handling thousands of TCP flows in the most efficient way. - Some routers use fq_codel or other AQM, but still would like to use BBR for the few TCP flows they initiate/terminate. This patch implements an automatic fallback to internal pacing. Pacing is requested either by BBR or use of SO_MAX_PACING_RATE option. If sch_fq happens to be in the egress path, pacing is delegated to the qdisc, otherwise pacing is done by TCP itself. One advantage of pacing from TCP stack is to get more precise rtt estimations, and less work done from TX completion, since TCP Small queue limits are not generally hit. Setups with single TX queue but many cpus might even benefit from this. Note that unlike sch_fq, we do not take into account header sizes. Taking care of these headers would add additional complexity for no practical differences in behavior. Some performance numbers using 800 TCP_STREAM flows rate limited to ~48 Mbit per second on 40Gbit NIC. If MQ+pfifo_fast is used on the NIC : $ sar -n DEV 1 5 | grep eth 14:48:44 eth0 725743.00 2932134.00 46776.76 4335184.68 0.00 0.00 1.00 14:48:45 eth0 725349.00 2932112.00 46751.86 4335158.90 0.00 0.00 0.00 14:48:46 eth0 725101.00 2931153.00 46735.07 4333748.63 0.00 0.00 0.00 14:48:47 eth0 725099.00 2931161.00 46735.11 4333760.44 0.00 0.00 1.00 14:48:48 eth0 725160.00 2931731.00 46738.88 4334606.07 0.00 0.00 0.00 Average: eth0 725290.40 2931658.20 46747.54 4334491.74 0.00 0.00 0.40 $ vmstat 1 5 procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu----- r b swpd free buff cache si so bi bo in cs us sy id wa st 4 0 0 259825920 45644 2708324 0 0 21 2 247 98 0 0 100 0 0 4 0 0 259823744 45644 2708356 0 0 0 0 2400825 159843 0 19 81 0 0 0 0 0 259824208 45644 2708072 0 0 0 0 2407351 159929 0 19 81 0 0 1 0 0 259824592 45644 2708128 0 0 0 0 2405183 160386 0 19 80 0 0 1 0 0 259824272 45644 2707868 0 0 0 32 2396361 158037 0 19 81 0 0 Now use MQ+FQ : lpaa23:~# echo fq >/proc/sys/net/core/default_qdisc lpaa23:~# tc qdisc replace dev eth0 root mq $ sar -n DEV 1 5 | grep eth 14:49:57 eth0 678614.00 2727930.00 43739.13 4033279.14 0.00 0.00 0.00 14:49:58 eth0 677620.00 2723971.00 43674.69 4027429.62 0.00 0.00 1.00 14:49:59 eth0 676396.00 2719050.00 43596.83 4020125.02 0.00 0.00 0.00 14:50:00 eth0 675197.00 2714173.00 43518.62 4012938.90 0.00 0.00 1.00 14:50:01 eth0 676388.00 2719063.00 43595.47 4020171.64 0.00 0.00 0.00 Average: eth0 676843.00 2720837.40 43624.95 4022788.86 0.00 0.00 0.40 $ vmstat 1 5 procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu----- r b swpd free buff cache si so bi bo in cs us sy id wa st 2 0 0 259832240 46008 2710912 0 0 21 2 223 192 0 1 99 0 0 1 0 0 259832896 46008 2710744 0 0 0 0 1702206 198078 0 17 82 0 0 0 0 0 259830272 46008 2710596 0 0 0 0 1696340 197756 1 17 83 0 0 4 0 0 259829168 46024 2710584 0 0 16 0 1688472 197158 1 17 82 0 0 3 0 0 259830224 46024 2710408 0 0 0 0 1692450 197212 0 18 82 0 0 As expected, number of interrupts per second is very different. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Cc: Neal Cardwell Cc: Yuchung Cheng Cc: Van Jacobson Cc: Jerry Chu Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b6d5adcee8fc..22854f028434 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -293,6 +293,8 @@ struct tcp_sock { u32 sacked_out; /* SACK'd packets */ u32 fackets_out; /* FACK'd packets */ + struct hrtimer pacing_timer; + /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; -- cgit v1.2.3 From 1b86f702f80de32d555519e2c4c61385faeab710 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 16 May 2017 18:29:11 +0200 Subject: net: phy: Remove residual magic from PHY drivers commit fa8cddaf903c ("net phylib: Remove unnecessary condition check in phy") removed the only place where the PHY flag PHY_HAS_MAGICANEG was checked. But it left the flag being set in the drivers. Remove the flag. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index e76e4adbc7c7..54ef45823fc1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -58,8 +58,7 @@ #define PHY_IGNORE_INTERRUPT -2 #define PHY_HAS_INTERRUPT 0x00000001 -#define PHY_HAS_MAGICANEG 0x00000002 -#define PHY_IS_INTERNAL 0x00000004 +#define PHY_IS_INTERNAL 0x00000002 #define MDIO_DEVICE_IS_PHY 0x80000000 /* Interface Mode definitions */ -- cgit v1.2.3 From 3ffad468cf1d9825b425733941bdad0d8d20e795 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 16 May 2017 11:43:43 -0700 Subject: regulator: Allow for asymmetric settling times Some regulators have different settling times for voltage increases and decreases. To avoid a time penalty on the faster transition allow for different settings for up- and downward transitions. Signed-off-by: Matthias Kaehlcke Acked-by: Laxman Dewangan Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 117699d1f7df..9cd4fef37203 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -110,6 +110,10 @@ struct regulator_state { * @ramp_delay: Time to settle down after voltage change (unit: uV/us) * @settling_time: Time to settle down after voltage change when voltage * change is non-linear (unit: microseconds). + * @settling_time_up: Time to settle down after voltage increase when voltage + * change is non-linear (unit: microseconds). + * @settling_time_down : Time to settle down after voltage decrease when + * voltage change is non-linear (unit: microseconds). * @active_discharge: Enable/disable active discharge. The enum * regulator_active_discharge values are used for * initialisation. @@ -152,6 +156,8 @@ struct regulation_constraints { unsigned int ramp_delay; unsigned int settling_time; + unsigned int settling_time_up; + unsigned int settling_time_down; unsigned int enable_time; unsigned int active_discharge; -- cgit v1.2.3 From a86c309e71dc4f43c68483f7e328b1d4f9fef618 Mon Sep 17 00:00:00 2001 From: Mats Karrman Date: Tue, 25 Apr 2017 23:49:47 +0200 Subject: usb: typec: Don't prevent using constant typec_mode_desc initializers In some situations, e.g. when registering alternate modes for local typec ports, it may be handy to use constant mode descriptors. Allow this by changing the mode descriptor arguments of typec_port_register_altmode() et.al. to using const pointers. Signed-off-by: Mats Karrman Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index ec78204964ab..d1d2ebcf36ec 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -117,13 +117,13 @@ struct typec_altmode_desc { struct typec_altmode *typec_partner_register_altmode(struct typec_partner *partner, - struct typec_altmode_desc *desc); + const struct typec_altmode_desc *desc); struct typec_altmode *typec_plug_register_altmode(struct typec_plug *plug, - struct typec_altmode_desc *desc); + const struct typec_altmode_desc *desc); struct typec_altmode *typec_port_register_altmode(struct typec_port *port, - struct typec_altmode_desc *desc); + const struct typec_altmode_desc *desc); void typec_unregister_altmode(struct typec_altmode *altmode); struct typec_port *typec_altmode2port(struct typec_altmode *alt); -- cgit v1.2.3 From 7d21114dc6a2d53babef43a84a8d8db2905d283d Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 5 May 2017 14:12:24 +0800 Subject: usb: phy: Introduce one extcon device into usb phy Usually usb phy need register one extcon device to get the connection notifications. It will remove some duplicate code if the extcon device is registered using common code instead of each phy driver having its own related extcon APIs. So we add one pointer of extcon device into usb phy structure, and some other helper functions to register extcon. Signed-off-by: Baolin Wang Signed-off-by: Felipe Balbi --- include/linux/usb/phy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index 31a8068c42a5..299245105610 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -9,6 +9,7 @@ #ifndef __LINUX_USB_PHY_H #define __LINUX_USB_PHY_H +#include #include #include @@ -85,6 +86,12 @@ struct usb_phy { struct usb_phy_io_ops *io_ops; void __iomem *io_priv; + /* to support extcon device */ + struct extcon_dev *edev; + struct extcon_dev *id_edev; + struct notifier_block vbus_nb; + struct notifier_block id_nb; + /* for notification of usb_phy_events */ struct atomic_notifier_head notifier; -- cgit v1.2.3 From 15060aba717115dc9f204c02213a7c6bf341163e Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Wed, 10 May 2017 11:39:03 -0700 Subject: iommu/vt-d: Helper function to query if a pasid has any active users A driver would need to know if there are any active references to a a PASID before cleaning up its resources. This function helps check if there are any active users of a PASID before it can perform any recovery on that device. To: Joerg Roedel To: linux-kernel@vger.kernel.org To: David Woodhouse Cc: Jean-Phillipe Brucker Cc: iommu@lists.linux-foundation.org Signed-off-by: CQ Tang Signed-off-by: Ashok Raj Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 3c25794042f9..99bc5b3ae26e 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -102,6 +102,21 @@ extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, */ extern int intel_svm_unbind_mm(struct device *dev, int pasid); +/** + * intel_svm_is_pasid_valid() - check if pasid is valid + * @dev: Device for which PASID was allocated + * @pasid: PASID value to be checked + * + * This function checks if the specified pasid is still valid. A + * valid pasid means the backing mm is still having a valid user. + * For kernel callers init_mm is always valid. for other mm, if mm->mm_users + * is non-zero, it is valid. + * + * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid + * 1 if pasid is valid. + */ +extern int intel_svm_is_pasid_valid(struct device *dev, int pasid); + #else /* CONFIG_INTEL_IOMMU_SVM */ static inline int intel_svm_bind_mm(struct device *dev, int *pasid, @@ -114,6 +129,11 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) { BUG(); } + +static int intel_svm_is_pasid_valid(struct device *dev, int pasid) +{ + return -EINVAL; +} #endif /* CONFIG_INTEL_IOMMU_SVM */ #define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL)) -- cgit v1.2.3 From 9a568de4818dea9a05af141046bd3e589245ab83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 May 2017 14:00:14 -0700 Subject: tcp: switch TCP TS option (RFC 7323) to 1ms clock TCP Timestamps option is defined in RFC 7323 Traditionally on linux, it has been tied to the internal 'jiffies' variable, because it had been a cheap and good enough generator. For TCP flows on the Internet, 1 ms resolution would be much better than 4ms or 10ms (HZ=250 or HZ=100 respectively) For TCP flows in the DC, Google has used usec resolution for more than two years with great success [1] Receive size autotuning (DRS) is indeed more precise and converges faster to optimal window size. This patch converts tp->tcp_mstamp to a plain u64 value storing a 1 usec TCP clock. This choice will allow us to upstream the 1 usec TS option as discussed in IETF 97. [1] https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/skbuff.h | 62 +------------------------------------------------- include/linux/tcp.h | 22 +++++++++--------- 2 files changed, 12 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bfc7892f6c33..7c0cb2ce8b01 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -506,66 +506,6 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif -/** - * struct skb_mstamp - multi resolution time stamps - * @stamp_us: timestamp in us resolution - * @stamp_jiffies: timestamp in jiffies - */ -struct skb_mstamp { - union { - u64 v64; - struct { - u32 stamp_us; - u32 stamp_jiffies; - }; - }; -}; - -/** - * skb_mstamp_get - get current timestamp - * @cl: place to store timestamps - */ -static inline void skb_mstamp_get(struct skb_mstamp *cl) -{ - u64 val = local_clock(); - - do_div(val, NSEC_PER_USEC); - cl->stamp_us = (u32)val; - cl->stamp_jiffies = (u32)jiffies; -} - -/** - * skb_mstamp_delta - compute the difference in usec between two skb_mstamp - * @t1: pointer to newest sample - * @t0: pointer to oldest sample - */ -static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, - const struct skb_mstamp *t0) -{ - s32 delta_us = t1->stamp_us - t0->stamp_us; - u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies; - - /* If delta_us is negative, this might be because interval is too big, - * or local_clock() drift is too big : fallback using jiffies. - */ - if (delta_us <= 0 || - delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ))) - - delta_us = jiffies_to_usecs(delta_jiffies); - - return delta_us; -} - -static inline bool skb_mstamp_after(const struct skb_mstamp *t1, - const struct skb_mstamp *t0) -{ - s32 diff = t1->stamp_jiffies - t0->stamp_jiffies; - - if (!diff) - diff = t1->stamp_us - t0->stamp_us; - return diff > 0; -} - /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -646,7 +586,7 @@ struct sk_buff { union { ktime_t tstamp; - struct skb_mstamp skb_mstamp; + u64 skb_mstamp; }; }; struct rb_node rbnode; /* used in netem & tcp stack */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 22854f028434..542ca1ae02c4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -123,7 +123,7 @@ struct tcp_request_sock_ops; struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; - struct skb_mstamp snt_synack; /* first SYNACK sent time */ + u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; u32 txhash; u32 rcv_isn; @@ -211,7 +211,7 @@ struct tcp_sock { /* Information of the most recently (s)acked skb */ struct tcp_rack { - struct skb_mstamp mstamp; /* (Re)sent time of the skb */ + u64 mstamp; /* (Re)sent time of the skb */ u32 rtt_us; /* Associated RTT */ u32 end_seq; /* Ending TCP sequence of the skb */ u8 advanced; /* mstamp advanced since last lost marking */ @@ -240,7 +240,7 @@ struct tcp_sock { u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ - struct skb_mstamp tcp_mstamp; /* most recent packet received/sent */ + u64 tcp_mstamp; /* most recent packet received/sent */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ @@ -280,8 +280,8 @@ struct tcp_sock { u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ u32 app_limited; /* limited until "delivered" reaches this val */ - struct skb_mstamp first_tx_mstamp; /* start of window send phase */ - struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ + u64 first_tx_mstamp; /* start of window send phase */ + u64 delivered_mstamp; /* time we reached "delivered" */ u32 rate_delivered; /* saved rate sample: packets delivered */ u32 rate_interval_us; /* saved rate sample: time elapsed */ @@ -335,16 +335,16 @@ struct tcp_sock { /* Receiver side RTT estimation */ struct { - u32 rtt_us; - u32 seq; - struct skb_mstamp time; + u32 rtt_us; + u32 seq; + u64 time; } rcv_rtt_est; /* Receiver queue space */ struct { - int space; - u32 seq; - struct skb_mstamp time; + int space; + u32 seq; + u64 time; } rcvq_space; /* TCP-specific MTU probe information. */ -- cgit v1.2.3 From 197a5212c3dd70be267b5cd930be0fb68bb53018 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 17 May 2017 12:14:37 +0800 Subject: ptr_ring: add ptr_ring_unconsume Applications that consume a batch of entries in one go can benefit from ability to return some of them back into the ring. Add an API for that - assuming there's space. If there's no space naturally can't do this and have to drop entries, but this implies ring is full so we'd likely drop some anyway. Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6b2e0dd88569..796b90f6d4e9 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -403,6 +403,61 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) return 0; } +/* + * Return entries into ring. Destroy entries that don't fit. + * + * Note: this is expected to be a rare slow path operation. + * + * Note: producer lock is nested within consumer lock, so if you + * resize you must make sure all uses nest correctly. + * In particular if you consume ring in interrupt or BH context, you must + * disable interrupts/BH when doing so. + */ +static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, + void (*destroy)(void *)) +{ + unsigned long flags; + int head; + + spin_lock_irqsave(&r->consumer_lock, flags); + spin_lock(&r->producer_lock); + + if (!r->size) + goto done; + + /* + * Clean out buffered entries (for simplicity). This way following code + * can test entries for NULL and if not assume they are valid. + */ + head = r->consumer_head - 1; + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + r->consumer_tail = r->consumer_head; + + /* + * Go over entries in batch, start moving head back and copy entries. + * Stop when we run into previously unconsumed entries. + */ + while (n) { + head = r->consumer_head - 1; + if (head < 0) + head = r->size - 1; + if (r->queue[head]) { + /* This batch entry will have to be destroyed. */ + goto done; + } + r->queue[head] = batch[--n]; + r->consumer_tail = r->consumer_head = head; + } + +done: + /* Destroy all entries left in the batch. */ + while (n) + destroy(batch[--n]); + spin_unlock(&r->producer_lock); + spin_unlock_irqrestore(&r->consumer_lock, flags); +} + static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, int size, gfp_t gfp, void (*destroy)(void *)) -- cgit v1.2.3 From 3acb696015a222f4b25c1b5dce4e36b2d4980da6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 17 May 2017 12:14:38 +0800 Subject: skb_array: introduce skb_array_unconsume Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/skb_array.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index f4dfade428f0..79850b638bf2 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -156,6 +156,12 @@ static void __skb_array_destroy_skb(void *ptr) kfree_skb(ptr); } +static inline void skb_array_unconsume(struct skb_array *a, + struct sk_buff **skbs, int n) +{ + ptr_ring_unconsume(&a->ring, (void **)skbs, n, __skb_array_destroy_skb); +} + static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) { return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); -- cgit v1.2.3 From 728fc8d5532b956f9c4b48dff0577fb722251343 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 17 May 2017 12:14:39 +0800 Subject: ptr_ring: introduce batch dequeuing This patch introduce a batched version of consuming, consumer can dequeue more than one pointers from the ring at a time. We don't care about the reorder of reading here so no need for compiler barrier. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 796b90f6d4e9..d8c97ec8a8e6 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -278,6 +278,22 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) return ptr; } +static inline int __ptr_ring_consume_batched(struct ptr_ring *r, + void **array, int n) +{ + void *ptr; + int i; + + for (i = 0; i < n; i++) { + ptr = __ptr_ring_consume(r); + if (!ptr) + break; + array[i] = ptr; + } + + return i; +} + /* * Note: resize (below) nests producer lock within consumer lock, so if you * call this in interrupt or BH context, you must disable interrupts/BH when @@ -328,6 +344,55 @@ static inline void *ptr_ring_consume_bh(struct ptr_ring *r) return ptr; } +static inline int ptr_ring_consume_batched(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock(&r->consumer_lock); + + return ret; +} + +static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock_irq(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_irq(&r->consumer_lock); + + return ret; +} + +static inline int ptr_ring_consume_batched_any(struct ptr_ring *r, + void **array, int n) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&r->consumer_lock, flags); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ret; +} + +static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, + void **array, int n) +{ + int ret; + + spin_lock_bh(&r->consumer_lock); + ret = __ptr_ring_consume_batched(r, array, n); + spin_unlock_bh(&r->consumer_lock); + + return ret; +} + /* Cast to structure type and call a function without discarding from FIFO. * Function must return a value. * Callers must take consumer_lock. -- cgit v1.2.3 From 3528c1a52e7af001e0e387fcb6bac2bdb3775d3e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 17 May 2017 12:14:40 +0800 Subject: skb_array: introduce batch dequeuing Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/skb_array.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 79850b638bf2..35226cd4efb0 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -97,21 +97,46 @@ static inline struct sk_buff *skb_array_consume(struct skb_array *a) return ptr_ring_consume(&a->ring); } +static inline int skb_array_consume_batched(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched(&a->ring, (void **)array, n); +} + static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a) { return ptr_ring_consume_irq(&a->ring); } +static inline int skb_array_consume_batched_irq(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched_irq(&a->ring, (void **)array, n); +} + static inline struct sk_buff *skb_array_consume_any(struct skb_array *a) { return ptr_ring_consume_any(&a->ring); } +static inline int skb_array_consume_batched_any(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched_any(&a->ring, (void **)array, n); +} + + static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a) { return ptr_ring_consume_bh(&a->ring); } +static inline int skb_array_consume_batched_bh(struct skb_array *a, + struct sk_buff **array, int n) +{ + return ptr_ring_consume_batched_bh(&a->ring, (void **)array, n); +} + static inline int __skb_array_len_with_tag(struct sk_buff *skb) { if (likely(skb)) { -- cgit v1.2.3 From 83339c6b159ea6429a1db40b0d9d1083ab574733 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 17 May 2017 12:14:41 +0800 Subject: tun: export skb_array This patch exports skb_array through tun_get_skb_array(). Caller can then manipulate skb array directly. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/if_tun.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index ed6da2e6df90..bf9bdf42d577 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -19,6 +19,7 @@ #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); +struct skb_array *tun_get_skb_array(struct file *file); #else #include #include @@ -28,5 +29,9 @@ static inline struct socket *tun_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } +static inline struct skb_array *tun_get_skb_array(struct file *f) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TUN */ #endif /* __IF_TUN_H */ -- cgit v1.2.3 From 49f96fd0cb3808e5ff96573f28b3dceb16eb6998 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 17 May 2017 12:14:42 +0800 Subject: tap: export skb_array This patch exports skb_array through tap_get_skb_array(). Caller can then manipulate skb array directly. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/if_tap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 3482c3c2037d..4837157da0dc 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -3,6 +3,7 @@ #if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); +struct skb_array *tap_get_skb_array(struct file *file); #else #include #include @@ -12,6 +13,10 @@ static inline struct socket *tap_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } +static inline struct skb_array *tap_get_skb_array(struct file *f) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TAP */ #include -- cgit v1.2.3 From ea5244e2af3b4813bf3d90ba6a6481d1a3c33d15 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 2 May 2017 17:15:42 +0930 Subject: serial: 8250: Add flag so drivers can avoid THRE probe The probing of THRE irq behaviour assumes the other end will be reading bytes out of the buffer in order to probe the port at driver init. In some cases the other end cannot be relied upon to read these bytes, so provide a flag for them to skip this step. Bit 19 was chosen as the flags are a int and the top bits are taken. Acked-by: Benjamin Herrenschmidt Signed-off-by: Joel Stanley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 64d892f1e5cd..1775500294bb 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -195,6 +195,7 @@ struct uart_port { #define UPF_NO_TXEN_TEST ((__force upf_t) (1 << 15)) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ ) +#define UPF_NO_THRE_TEST ((__force upf_t) (1 << 19)) /* Port has hardware-assisted h/w flow control */ #define UPF_AUTO_CTS ((__force upf_t) (1 << 20)) #define UPF_AUTO_RTS ((__force upf_t) (1 << 21)) -- cgit v1.2.3 From 0cd2950357e31a96be03b531b4b11fe1df812c9f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 17 May 2017 13:30:44 +0300 Subject: net: make struct net_device::tx_queue_len unsigned int 4 billion packet queue is something unthinkable so use 32-bit value for now. Space savings on x86_64: add/remove: 0/0 grow/shrink: 3/70 up/down: 16/-131 (-115) function old new delta change_tx_queue_len 94 108 +14 qdisc_create 1176 1177 +1 alloc_netdev_mqs 1124 1125 +1 xenvif_alloc 533 532 -1 x25_asy_setup 167 166 -1 ... tun_queue_resize 945 940 -5 pfifo_fast_enqueue 167 162 -5 qfq_init_qdisc 168 158 -10 tap_queue_resize 810 799 -11 transmit 719 698 -21 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3f39d27decf4..0150b2dd3031 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1824,7 +1824,7 @@ struct net_device { #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); #endif - unsigned long tx_queue_len; + unsigned int tx_queue_len; spinlock_t tx_global_lock; int watchdog_timeo; -- cgit v1.2.3 From f4660cc994e12bae60d6f49895636fba662ce0a1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 10 May 2017 10:19:18 -0400 Subject: vhost/vsock: use static minor number Vhost-vsock is a software device so there is no probe call that causes the driver to register its misc char device node. This creates a chicken and egg problem: userspace applications must open /dev/vhost-vsock to use the driver but the file doesn't exist until the kernel module has been loaded. Use the devname modalias mechanism so that /dev/vhost-vsock is created at boot. The vhost_vsock kernel module is automatically loaded when the first application opens /dev/host-vsock. Note that the "reserved for local use" range in Documentation/admin-guide/devices.txt is incorrect. The userio driver already occupies part of that range. I've updated the documentation accordingly. Cc: device@lanana.org Signed-off-by: Stefan Hajnoczi Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 762b5fec3383..58751eae5f77 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -54,6 +54,7 @@ #define VHOST_NET_MINOR 238 #define UHID_MINOR 239 #define USERIO_MINOR 240 +#define VHOST_VSOCK_MINOR 241 #define MISC_DYNAMIC_MINOR 255 struct device; -- cgit v1.2.3 From 34cfb106d1f8a746fcccbe61c852f705dcdceaa2 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 18 May 2017 10:07:06 -0500 Subject: misc: sram-exec: Use aligned fncpy instead of memcpy Currently the sram-exec functionality, which allows allocation of executable memory and provides an API to move code to it, is only selected in configs for the ARM architecture. Based on commit 5756e9dd0de6 ("ARM: 6640/1: Thumb-2: Symbol manipulation macros for function body copying") simply copying a C function pointer address using memcpy without consideration of alignment and Thumb is unsafe on ARM platforms. The aforementioned patch introduces the fncpy macro which is a safe way to copy executable code on ARM platforms, so let's make use of that here rather than the unsafe plain memcpy that was previously used by sram_exec_copy. Now sram_exec_copy will move the code to "dst" and return an address that is guaranteed to be safely callable. In the future, architectures hoping to make use of the sram-exec functionality must define an fncpy macro just as ARM has done to guarantee or check for safe copying to executable memory before allowing the arch to select CONFIG_SRAM_EXEC. Acked-by: Tony Lindgren Acked-by: Russell King Reviewed-by: Alexandre Belloni Signed-off-by: Dave Gerlach Signed-off-by: Greg Kroah-Hartman --- include/linux/sram.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sram.h b/include/linux/sram.h index c97dcbe8ce25..4fb405fb0480 100644 --- a/include/linux/sram.h +++ b/include/linux/sram.h @@ -16,12 +16,12 @@ struct gen_pool; #ifdef CONFIG_SRAM_EXEC -int sram_exec_copy(struct gen_pool *pool, void *dst, void *src, size_t size); +void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, size_t size); #else -static inline int sram_exec_copy(struct gen_pool *pool, void *dst, void *src, - size_t size) +static inline void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, + size_t size) { - return -ENODEV; + return NULL; } #endif /* CONFIG_SRAM_EXEC */ #endif /* __LINUX_SRAM_H__ */ -- cgit v1.2.3 From af777cd1b83e95138e7285fde87c795ef0ae7c4d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 13 May 2017 04:51:40 -0700 Subject: doc: ReSTify credentials.txt This updates the credentials API documentation to ReST markup and moves it under the security subsection of kernel API documentation. Cc: David Howells Signed-off-by: Kees Cook Signed-off-by: Jonathan Corbet --- include/linux/cred.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index b03e7d049a64..c728d515e5e2 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -1,4 +1,4 @@ -/* Credentials management - see Documentation/security/credentials.txt +/* Credentials management - see Documentation/security/credentials.rst * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) -- cgit v1.2.3 From f00f85a8b2e0ac344f8dbaa3441b31bc283ce400 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 13 May 2017 04:51:42 -0700 Subject: doc: security: minor cleanups to build kernel-doc These fixes were needed to parse lsm_hooks.h kernel-doc. More work is needed, but this is the first step. Acked-by: James Morris Acked-by: Casey Schaufler Signed-off-by: Kees Cook Signed-off-by: Jonathan Corbet --- include/linux/lsm_hooks.h | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 080f34e66017..a1eeaf603d2f 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -29,6 +29,8 @@ #include /** + * union security_list_options - Linux Security Module hook function list + * * Security hooks for program execution operations. * * @bprm_set_creds: @@ -193,8 +195,8 @@ * @value will be set to the allocated attribute value. * @len will be set to the length of the value. * Returns 0 if @name and @value have been successfully set, - * -EOPNOTSUPP if no security attribute is needed, or - * -ENOMEM on memory allocation failure. + * -EOPNOTSUPP if no security attribute is needed, or + * -ENOMEM on memory allocation failure. * @inode_create: * Check permission to create a regular file. * @dir contains inode structure of the parent of the new file. @@ -510,8 +512,7 @@ * process @tsk. Note that this hook is sometimes called from interrupt. * Note that the fown_struct, @fown, is never outside the context of a * struct file, so the file structure (and associated security information) - * can always be obtained: - * container_of(fown, struct file, f_owner) + * can always be obtained: container_of(fown, struct file, f_owner) * @tsk contains the structure of task receiving signal. * @fown contains the file owner information. * @sig is the signal that will be sent. When 0, kernel sends SIGIO. @@ -521,7 +522,7 @@ * to receive an open file descriptor via socket IPC. * @file contains the file structure being received. * Return 0 if permission is granted. - * @file_open + * @file_open: * Save open-time permission checking state for later use upon * file_permission, and recheck access if anything has changed * since inode_permission. @@ -1143,7 +1144,7 @@ * @sma contains the semaphore structure. May be NULL. * @cmd contains the operation to be performed. * Return 0 if permission is granted. - * @sem_semop + * @sem_semop: * Check permissions before performing operations on members of the * semaphore set @sma. If the @alter flag is nonzero, the semaphore set * may be modified. @@ -1153,20 +1154,20 @@ * @alter contains the flag indicating whether changes are to be made. * Return 0 if permission is granted. * - * @binder_set_context_mgr + * @binder_set_context_mgr: * Check whether @mgr is allowed to be the binder context manager. * @mgr contains the task_struct for the task being registered. * Return 0 if permission is granted. - * @binder_transaction + * @binder_transaction: * Check whether @from is allowed to invoke a binder transaction call * to @to. * @from contains the task_struct for the sending task. * @to contains the task_struct for the receiving task. - * @binder_transfer_binder + * @binder_transfer_binder: * Check whether @from is allowed to transfer a binder reference to @to. * @from contains the task_struct for the sending task. * @to contains the task_struct for the receiving task. - * @binder_transfer_file + * @binder_transfer_file: * Check whether @from is allowed to transfer @file to @to. * @from contains the task_struct for the sending task. * @file contains the struct file being transferred. @@ -1214,7 +1215,7 @@ * @cred contains the credentials to use. * @ns contains the user namespace we want the capability in * @cap contains the capability . - * @audit: Whether to write an audit message or not + * @audit contains whether to write an audit message or not * Return 0 if the capability is granted for @tsk. * @syslog: * Check permission before accessing the kernel message ring or changing @@ -1336,9 +1337,7 @@ * @inode we wish to get the security context of. * @ctx is a pointer in which to place the allocated security context. * @ctxlen points to the place to put the length of @ctx. - * This is the main security structure. */ - union security_list_options { int (*binder_set_context_mgr)(struct task_struct *mgr); int (*binder_transaction)(struct task_struct *from, -- cgit v1.2.3 From b68101a1e8f0263dbc7b8375d2a7c57c6216fb76 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 13 May 2017 04:51:50 -0700 Subject: doc: ReSTify keys.txt This creates a new section in the security development index for kernel keys, and adjusts for ReST markup. Cc: David Howells Signed-off-by: Kees Cook Signed-off-by: Jonathan Corbet --- include/linux/key.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 0c9b93b0d1f7..24dfe6c1f8cb 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. * * - * See Documentation/security/keys.txt for information on keys/keyrings. + * See Documentation/security/keys/core.rst for information on keys/keyrings. */ #ifndef _LINUX_KEY_H -- cgit v1.2.3 From 7b6859fbdcc4a590c8ef03bcc00d770b42d41c42 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Thu, 18 May 2017 19:41:04 +0300 Subject: qed: Utilize FW 8.20.0.0 This pushes qed [and as result, all qed* drivers] into using 8.20.0.0 firmware. The changes are mostly contained in qed with minor changes to qedi due to some HSI changes. Content-wise, the firmware contains fixes to various issues exposed since the release of the previous firmware, including: - Corrects iSCSI fast retransmit when data digest is enabled. - Stop draining packets when receiving several consecutive PFCs. - Prevent possible assertion when consecutively opening/closing many connections. - Prevent possible assertion due to too long BDQ fetch time. In addition, the new firmware would allow us to later add iWARP support in qed and qedr. Changes from previous version ----------------------------- - V2: Fix warning in qed_debug.c Signed-off-by: Chad Dupuis Signed-off-by: Ram Amrani Signed-off-by: Tomer Tayar Signed-off-by: Manish Rangankar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 209 ++++++++++++++++++++++++++------------- include/linux/qed/eth_common.h | 3 +- include/linux/qed/fcoe_common.h | 1 - include/linux/qed/iscsi_common.h | 91 ++++++++--------- include/linux/qed/rdma_common.h | 2 +- include/linux/qed/roce_common.h | 2 + include/linux/qed/tcp_common.h | 5 +- 7 files changed, 188 insertions(+), 125 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index fbab6e0514f0..a567cbf8c5b4 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -96,12 +96,12 @@ #define CORE_SPQE_PAGE_SIZE_BYTES 4096 -#define MAX_NUM_LL2_RX_QUEUES 32 -#define MAX_NUM_LL2_TX_STATS_COUNTERS 32 +#define MAX_NUM_LL2_RX_QUEUES 48 +#define MAX_NUM_LL2_TX_STATS_COUNTERS 48 #define FW_MAJOR_VERSION 8 -#define FW_MINOR_VERSION 15 -#define FW_REVISION_VERSION 3 +#define FW_MINOR_VERSION 20 +#define FW_REVISION_VERSION 0 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -181,6 +181,14 @@ #define CDU_VF_FL_SEG_TYPE_OFFSET_REG_TYPE_SHIFT (12) #define CDU_VF_FL_SEG_TYPE_OFFSET_REG_OFFSET_MASK (0xfff) + +#define CDU_CONTEXT_VALIDATION_CFG_ENABLE_SHIFT (0) +#define CDU_CONTEXT_VALIDATION_CFG_VALIDATION_TYPE_SHIFT (1) +#define CDU_CONTEXT_VALIDATION_CFG_USE_TYPE (2) +#define CDU_CONTEXT_VALIDATION_CFG_USE_REGION (3) +#define CDU_CONTEXT_VALIDATION_CFG_USE_CID (4) +#define CDU_CONTEXT_VALIDATION_CFG_USE_ACTIVE (5) + /*****************/ /* DQ CONSTANTS */ /*****************/ @@ -457,7 +465,6 @@ #define PXP_BAR_DQ 1 /* PTT and GTT */ -#define PXP_NUM_PF_WINDOWS 12 #define PXP_PER_PF_ENTRY_SIZE 8 #define PXP_NUM_GLOBAL_WINDOWS 243 #define PXP_GLOBAL_ENTRY_SIZE 4 @@ -482,6 +489,7 @@ #define PXP_PF_ME_OPAQUE_ADDR 0x1f8 #define PXP_PF_ME_CONCRETE_ADDR 0x1fc +#define PXP_NUM_PF_WINDOWS 12 #define PXP_EXTERNAL_BAR_PF_WINDOW_START 0x1000 #define PXP_EXTERNAL_BAR_PF_WINDOW_NUM PXP_NUM_PF_WINDOWS #define PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE 0x1000 @@ -618,16 +626,21 @@ /*****************/ /* PRM CONSTANTS */ /*****************/ -#define PRM_DMA_PAD_BYTES_NUM 2 -/******************/ -/* SDMs CONSTANTS */ -/******************/ -#define SDM_OP_GEN_TRIG_NONE 0 -#define SDM_OP_GEN_TRIG_WAKE_THREAD 1 -#define SDM_OP_GEN_TRIG_AGG_INT 2 -#define SDM_OP_GEN_TRIG_LOADER 4 -#define SDM_OP_GEN_TRIG_INDICATE_ERROR 6 -#define SDM_OP_GEN_TRIG_RELEASE_THREAD 7 +#define PRM_DMA_PAD_BYTES_NUM 2 +/*****************/ +/* SDMs CONSTANTS */ +/*****************/ + +#define SDM_OP_GEN_TRIG_NONE 0 +#define SDM_OP_GEN_TRIG_WAKE_THREAD 1 +#define SDM_OP_GEN_TRIG_AGG_INT 2 +#define SDM_OP_GEN_TRIG_LOADER 4 +#define SDM_OP_GEN_TRIG_INDICATE_ERROR 6 +#define SDM_OP_GEN_TRIG_INC_ORDER_CNT 9 + +/********************/ +/* Completion types */ +/********************/ #define SDM_COMP_TYPE_NONE 0 #define SDM_COMP_TYPE_WAKE_THREAD 1 @@ -638,10 +651,11 @@ #define SDM_COMP_TYPE_INDICATE_ERROR 6 #define SDM_COMP_TYPE_RELEASE_THREAD 7 #define SDM_COMP_TYPE_RAM 8 +#define SDM_COMP_TYPE_INC_ORDER_CNT 9 -/******************/ -/* PBF CONSTANTS */ -/******************/ +/*****************/ +/* PBF Constants */ +/*****************/ /* Number of PBF command queue lines. Each line is 32B. */ #define PBF_MAX_CMD_LINES 3328 @@ -861,7 +875,7 @@ enum db_dest { /* Enum of doorbell DPM types */ enum db_dpm_type { DPM_LEGACY, - DPM_ROCE, + DPM_RDMA, DPM_L2_INLINE, DPM_L2_BD, MAX_DB_DPM_TYPE @@ -884,8 +898,8 @@ struct db_l2_dpm_data { #define DB_L2_DPM_DATA_RESERVED0_SHIFT 27 #define DB_L2_DPM_DATA_SGE_NUM_MASK 0x7 #define DB_L2_DPM_DATA_SGE_NUM_SHIFT 28 -#define DB_L2_DPM_DATA_RESERVED1_MASK 0x1 -#define DB_L2_DPM_DATA_RESERVED1_SHIFT 31 +#define DB_L2_DPM_DATA_GFS_SRC_EN_MASK 0x1 +#define DB_L2_DPM_DATA_GFS_SRC_EN_SHIFT 31 }; /* Structure for SGE in a DPM doorbell of type DPM_L2_BD */ @@ -931,31 +945,33 @@ struct db_pwm_addr { }; /* Parameters to RoCE firmware, passed in EDPM doorbell */ -struct db_roce_dpm_params { +struct db_rdma_dpm_params { __le32 params; -#define DB_ROCE_DPM_PARAMS_SIZE_MASK 0x3F -#define DB_ROCE_DPM_PARAMS_SIZE_SHIFT 0 -#define DB_ROCE_DPM_PARAMS_DPM_TYPE_MASK 0x3 -#define DB_ROCE_DPM_PARAMS_DPM_TYPE_SHIFT 6 -#define DB_ROCE_DPM_PARAMS_OPCODE_MASK 0xFF -#define DB_ROCE_DPM_PARAMS_OPCODE_SHIFT 8 -#define DB_ROCE_DPM_PARAMS_WQE_SIZE_MASK 0x7FF -#define DB_ROCE_DPM_PARAMS_WQE_SIZE_SHIFT 16 -#define DB_ROCE_DPM_PARAMS_RESERVED0_MASK 0x1 -#define DB_ROCE_DPM_PARAMS_RESERVED0_SHIFT 27 -#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 -#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_SHIFT 28 -#define DB_ROCE_DPM_PARAMS_S_FLG_MASK 0x1 -#define DB_ROCE_DPM_PARAMS_S_FLG_SHIFT 29 -#define DB_ROCE_DPM_PARAMS_RESERVED1_MASK 0x3 -#define DB_ROCE_DPM_PARAMS_RESERVED1_SHIFT 30 +#define DB_RDMA_DPM_PARAMS_SIZE_MASK 0x3F +#define DB_RDMA_DPM_PARAMS_SIZE_SHIFT 0 +#define DB_RDMA_DPM_PARAMS_DPM_TYPE_MASK 0x3 +#define DB_RDMA_DPM_PARAMS_DPM_TYPE_SHIFT 6 +#define DB_RDMA_DPM_PARAMS_OPCODE_MASK 0xFF +#define DB_RDMA_DPM_PARAMS_OPCODE_SHIFT 8 +#define DB_RDMA_DPM_PARAMS_WQE_SIZE_MASK 0x7FF +#define DB_RDMA_DPM_PARAMS_WQE_SIZE_SHIFT 16 +#define DB_RDMA_DPM_PARAMS_RESERVED0_MASK 0x1 +#define DB_RDMA_DPM_PARAMS_RESERVED0_SHIFT 27 +#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 +#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT 28 +#define DB_RDMA_DPM_PARAMS_S_FLG_MASK 0x1 +#define DB_RDMA_DPM_PARAMS_S_FLG_SHIFT 29 +#define DB_RDMA_DPM_PARAMS_RESERVED1_MASK 0x1 +#define DB_RDMA_DPM_PARAMS_RESERVED1_SHIFT 30 +#define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK 0x1 +#define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_SHIFT 31 }; /* Structure for doorbell data, in ROCE DPM mode, for 1st db in a DPM burst */ -struct db_roce_dpm_data { +struct db_rdma_dpm_data { __le16 icid; __le16 prod_val; - struct db_roce_dpm_params params; + struct db_rdma_dpm_params params; }; /* Igu interrupt command */ @@ -1026,6 +1042,42 @@ struct parsing_and_err_flags { #define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT 15 }; +struct parsing_err_flags { + __le16 flags; +#define PARSING_ERR_FLAGS_MAC_ERROR_MASK 0x1 +#define PARSING_ERR_FLAGS_MAC_ERROR_SHIFT 0 +#define PARSING_ERR_FLAGS_TRUNC_ERROR_MASK 0x1 +#define PARSING_ERR_FLAGS_TRUNC_ERROR_SHIFT 1 +#define PARSING_ERR_FLAGS_PKT_TOO_SMALL_MASK 0x1 +#define PARSING_ERR_FLAGS_PKT_TOO_SMALL_SHIFT 2 +#define PARSING_ERR_FLAGS_ANY_HDR_MISSING_TAG_MASK 0x1 +#define PARSING_ERR_FLAGS_ANY_HDR_MISSING_TAG_SHIFT 3 +#define PARSING_ERR_FLAGS_ANY_HDR_IP_VER_MISMTCH_MASK 0x1 +#define PARSING_ERR_FLAGS_ANY_HDR_IP_VER_MISMTCH_SHIFT 4 +#define PARSING_ERR_FLAGS_ANY_HDR_IP_V4_HDR_LEN_TOO_SMALL_MASK 0x1 +#define PARSING_ERR_FLAGS_ANY_HDR_IP_V4_HDR_LEN_TOO_SMALL_SHIFT 5 +#define PARSING_ERR_FLAGS_ANY_HDR_IP_BAD_TOTAL_LEN_MASK 0x1 +#define PARSING_ERR_FLAGS_ANY_HDR_IP_BAD_TOTAL_LEN_SHIFT 6 +#define PARSING_ERR_FLAGS_IP_V4_CHKSM_ERROR_MASK 0x1 +#define PARSING_ERR_FLAGS_IP_V4_CHKSM_ERROR_SHIFT 7 +#define PARSING_ERR_FLAGS_ANY_HDR_L4_IP_LEN_MISMTCH_MASK 0x1 +#define PARSING_ERR_FLAGS_ANY_HDR_L4_IP_LEN_MISMTCH_SHIFT 8 +#define PARSING_ERR_FLAGS_ZERO_UDP_IP_V6_CHKSM_MASK 0x1 +#define PARSING_ERR_FLAGS_ZERO_UDP_IP_V6_CHKSM_SHIFT 9 +#define PARSING_ERR_FLAGS_INNER_L4_CHKSM_ERROR_MASK 0x1 +#define PARSING_ERR_FLAGS_INNER_L4_CHKSM_ERROR_SHIFT 10 +#define PARSING_ERR_FLAGS_ANY_HDR_ZERO_TTL_OR_HOP_LIM_MASK 0x1 +#define PARSING_ERR_FLAGS_ANY_HDR_ZERO_TTL_OR_HOP_LIM_SHIFT 11 +#define PARSING_ERR_FLAGS_NON_8021Q_TAG_EXISTS_IN_BOTH_HDRS_MASK 0x1 +#define PARSING_ERR_FLAGS_NON_8021Q_TAG_EXISTS_IN_BOTH_HDRS_SHIFT 12 +#define PARSING_ERR_FLAGS_GENEVE_OPTION_OVERSIZED_MASK 0x1 +#define PARSING_ERR_FLAGS_GENEVE_OPTION_OVERSIZED_SHIFT 13 +#define PARSING_ERR_FLAGS_TUNNEL_IP_V4_CHKSM_ERROR_MASK 0x1 +#define PARSING_ERR_FLAGS_TUNNEL_IP_V4_CHKSM_ERROR_SHIFT 14 +#define PARSING_ERR_FLAGS_TUNNEL_L4_CHKSM_ERROR_MASK 0x1 +#define PARSING_ERR_FLAGS_TUNNEL_L4_CHKSM_ERROR_SHIFT 15 +}; + struct pb_context { __le32 crc[4]; }; @@ -1288,39 +1340,56 @@ struct tdif_task_context { struct timers_context { __le32 logical_client_0; -#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK 0xFFFFFFF -#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT 0 -#define TIMERS_CONTEXT_VALIDLC0_MASK 0x1 -#define TIMERS_CONTEXT_VALIDLC0_SHIFT 28 -#define TIMERS_CONTEXT_ACTIVELC0_MASK 0x1 -#define TIMERS_CONTEXT_ACTIVELC0_SHIFT 29 -#define TIMERS_CONTEXT_RESERVED0_MASK 0x3 -#define TIMERS_CONTEXT_RESERVED0_SHIFT 30 +#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK 0x7FFFFFF +#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT 0 +#define TIMERS_CONTEXT_RESERVED0_MASK 0x1 +#define TIMERS_CONTEXT_RESERVED0_SHIFT 27 +#define TIMERS_CONTEXT_VALIDLC0_MASK 0x1 +#define TIMERS_CONTEXT_VALIDLC0_SHIFT 28 +#define TIMERS_CONTEXT_ACTIVELC0_MASK 0x1 +#define TIMERS_CONTEXT_ACTIVELC0_SHIFT 29 +#define TIMERS_CONTEXT_RESERVED1_MASK 0x3 +#define TIMERS_CONTEXT_RESERVED1_SHIFT 30 __le32 logical_client_1; -#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK 0xFFFFFFF -#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT 0 -#define TIMERS_CONTEXT_VALIDLC1_MASK 0x1 -#define TIMERS_CONTEXT_VALIDLC1_SHIFT 28 -#define TIMERS_CONTEXT_ACTIVELC1_MASK 0x1 -#define TIMERS_CONTEXT_ACTIVELC1_SHIFT 29 -#define TIMERS_CONTEXT_RESERVED1_MASK 0x3 -#define TIMERS_CONTEXT_RESERVED1_SHIFT 30 +#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK 0x7FFFFFF +#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT 0 +#define TIMERS_CONTEXT_RESERVED2_MASK 0x1 +#define TIMERS_CONTEXT_RESERVED2_SHIFT 27 +#define TIMERS_CONTEXT_VALIDLC1_MASK 0x1 +#define TIMERS_CONTEXT_VALIDLC1_SHIFT 28 +#define TIMERS_CONTEXT_ACTIVELC1_MASK 0x1 +#define TIMERS_CONTEXT_ACTIVELC1_SHIFT 29 +#define TIMERS_CONTEXT_RESERVED3_MASK 0x3 +#define TIMERS_CONTEXT_RESERVED3_SHIFT 30 __le32 logical_client_2; -#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK 0xFFFFFFF -#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT 0 -#define TIMERS_CONTEXT_VALIDLC2_MASK 0x1 -#define TIMERS_CONTEXT_VALIDLC2_SHIFT 28 -#define TIMERS_CONTEXT_ACTIVELC2_MASK 0x1 -#define TIMERS_CONTEXT_ACTIVELC2_SHIFT 29 -#define TIMERS_CONTEXT_RESERVED2_MASK 0x3 -#define TIMERS_CONTEXT_RESERVED2_SHIFT 30 +#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK 0x7FFFFFF +#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT 0 +#define TIMERS_CONTEXT_RESERVED4_MASK 0x1 +#define TIMERS_CONTEXT_RESERVED4_SHIFT 27 +#define TIMERS_CONTEXT_VALIDLC2_MASK 0x1 +#define TIMERS_CONTEXT_VALIDLC2_SHIFT 28 +#define TIMERS_CONTEXT_ACTIVELC2_MASK 0x1 +#define TIMERS_CONTEXT_ACTIVELC2_SHIFT 29 +#define TIMERS_CONTEXT_RESERVED5_MASK 0x3 +#define TIMERS_CONTEXT_RESERVED5_SHIFT 30 __le32 host_expiration_fields; -#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_MASK 0xFFFFFFF -#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_SHIFT 0 -#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_MASK 0x1 -#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_SHIFT 28 -#define TIMERS_CONTEXT_RESERVED3_MASK 0x7 -#define TIMERS_CONTEXT_RESERVED3_SHIFT 29 +#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_MASK 0x7FFFFFF +#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_SHIFT 0 +#define TIMERS_CONTEXT_RESERVED6_MASK 0x1 +#define TIMERS_CONTEXT_RESERVED6_SHIFT 27 +#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_MASK 0x1 +#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_SHIFT 28 +#define TIMERS_CONTEXT_RESERVED7_MASK 0x7 +#define TIMERS_CONTEXT_RESERVED7_SHIFT 29 }; + +enum tunnel_next_protocol { + e_unknown = 0, + e_l2 = 1, + e_ipv4 = 2, + e_ipv6 = 3, + MAX_TUNNEL_NEXT_PROTOCOL +}; + #endif /* __COMMON_HSI__ */ #endif diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index 34d93eb5bfba..cb06e6e368e1 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -75,7 +75,8 @@ (ETH_NUM_STATISTIC_COUNTERS - 3 * MAX_NUM_VFS / 4) /* Maximum number of buffers, used for RX packet placement */ -#define ETH_RX_MAX_BUFF_PER_PKT 5 +#define ETH_RX_MAX_BUFF_PER_PKT 5 +#define ETH_RX_BD_THRESHOLD 12 /* num of MAC/VLAN filters */ #define ETH_NUM_MAC_FILTERS 512 diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h index 947a635d04bb..12fc9e788eea 100644 --- a/include/linux/qed/fcoe_common.h +++ b/include/linux/qed/fcoe_common.h @@ -13,7 +13,6 @@ /*********************/ #define FC_ABTS_REPLY_MAX_PAYLOAD_LEN 12 -#define FCOE_MAX_SIZE_FCP_DATA_SUPER (8600) struct fcoe_abts_pkt { __le32 abts_rsp_fc_payload_lo; diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index 69949f8e354b..85e086cba639 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -75,25 +75,13 @@ #define ISCSI_TARGET_MODE 1 /* iSCSI request op codes */ -#define ISCSI_OPCODE_NOP_OUT_NO_IMM (0) -#define ISCSI_OPCODE_NOP_OUT ( \ - ISCSI_OPCODE_NOP_OUT_NO_IMM | 0x40) -#define ISCSI_OPCODE_SCSI_CMD_NO_IMM (1) -#define ISCSI_OPCODE_SCSI_CMD ( \ - ISCSI_OPCODE_SCSI_CMD_NO_IMM | 0x40) -#define ISCSI_OPCODE_TMF_REQUEST_NO_IMM (2) -#define ISCSI_OPCODE_TMF_REQUEST ( \ - ISCSI_OPCODE_TMF_REQUEST_NO_IMM | 0x40) -#define ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM (3) -#define ISCSI_OPCODE_LOGIN_REQUEST ( \ - ISCSI_OPCODE_LOGIN_REQUEST_NO_IMM | 0x40) -#define ISCSI_OPCODE_TEXT_REQUEST_NO_IMM (4) -#define ISCSI_OPCODE_TEXT_REQUEST ( \ - ISCSI_OPCODE_TEXT_REQUEST_NO_IMM | 0x40) -#define ISCSI_OPCODE_DATA_OUT (5) -#define ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM (6) -#define ISCSI_OPCODE_LOGOUT_REQUEST ( \ - ISCSI_OPCODE_LOGOUT_REQUEST_NO_IMM | 0x40) +#define ISCSI_OPCODE_NOP_OUT (0) +#define ISCSI_OPCODE_SCSI_CMD (1) +#define ISCSI_OPCODE_TMF_REQUEST (2) +#define ISCSI_OPCODE_LOGIN_REQUEST (3) +#define ISCSI_OPCODE_TEXT_REQUEST (4) +#define ISCSI_OPCODE_DATA_OUT (5) +#define ISCSI_OPCODE_LOGOUT_REQUEST (6) /* iSCSI response/messages op codes */ #define ISCSI_OPCODE_NOP_IN (0x20) @@ -172,17 +160,23 @@ struct iscsi_async_msg_hdr { struct iscsi_cmd_hdr { __le16 reserved1; u8 flags_attr; -#define ISCSI_CMD_HDR_ATTR_MASK 0x7 -#define ISCSI_CMD_HDR_ATTR_SHIFT 0 -#define ISCSI_CMD_HDR_RSRV_MASK 0x3 -#define ISCSI_CMD_HDR_RSRV_SHIFT 3 -#define ISCSI_CMD_HDR_WRITE_MASK 0x1 -#define ISCSI_CMD_HDR_WRITE_SHIFT 5 -#define ISCSI_CMD_HDR_READ_MASK 0x1 -#define ISCSI_CMD_HDR_READ_SHIFT 6 -#define ISCSI_CMD_HDR_FINAL_MASK 0x1 -#define ISCSI_CMD_HDR_FINAL_SHIFT 7 - u8 opcode; +#define ISCSI_CMD_HDR_ATTR_MASK 0x7 +#define ISCSI_CMD_HDR_ATTR_SHIFT 0 +#define ISCSI_CMD_HDR_RSRV_MASK 0x3 +#define ISCSI_CMD_HDR_RSRV_SHIFT 3 +#define ISCSI_CMD_HDR_WRITE_MASK 0x1 +#define ISCSI_CMD_HDR_WRITE_SHIFT 5 +#define ISCSI_CMD_HDR_READ_MASK 0x1 +#define ISCSI_CMD_HDR_READ_SHIFT 6 +#define ISCSI_CMD_HDR_FINAL_MASK 0x1 +#define ISCSI_CMD_HDR_FINAL_SHIFT 7 + u8 hdr_first_byte; +#define ISCSI_CMD_HDR_OPCODE_MASK 0x3F +#define ISCSI_CMD_HDR_OPCODE_SHIFT 0 +#define ISCSI_CMD_HDR_IMM_MASK 0x1 +#define ISCSI_CMD_HDR_IMM_SHIFT 6 +#define ISCSI_CMD_HDR_RSRV1_MASK 0x1 +#define ISCSI_CMD_HDR_RSRV1_SHIFT 7 __le32 hdr_second_dword; #define ISCSI_CMD_HDR_DATA_SEG_LEN_MASK 0xFFFFFF #define ISCSI_CMD_HDR_DATA_SEG_LEN_SHIFT 0 @@ -790,9 +784,9 @@ enum iscsi_error_types { ISCSI_CONN_ERROR_LOCAL_COMPLETION_ERROR, ISCSI_CONN_ERROR_DATA_OVERRUN, ISCSI_CONN_ERROR_OUT_OF_SGES_ERROR, - ISCSI_CONN_ERROR_TCP_SEG_PROC_URG_ERROR, - ISCSI_CONN_ERROR_TCP_SEG_PROC_IP_OPTIONS_ERROR, - ISCSI_CONN_ERROR_TCP_SEG_PROC_CONNECT_INVALID_WS_OPTION, + ISCSI_CONN_ERROR_IP_OPTIONS_ERROR, + ISCSI_CONN_ERROR_PRS_ERRORS, + ISCSI_CONN_ERROR_CONNECT_INVALID_TCP_OPTION, ISCSI_CONN_ERROR_TCP_IP_FRAGMENT_ERROR, ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_LEN, ISCSI_CONN_ERROR_PROTOCOL_ERR_AHS_TYPE, @@ -1304,22 +1298,6 @@ struct ystorm_iscsi_stats_drv { struct regpair iscsi_tx_total_pdu_cnt; }; -struct iscsi_db_data { - u8 params; -#define ISCSI_DB_DATA_DEST_MASK 0x3 -#define ISCSI_DB_DATA_DEST_SHIFT 0 -#define ISCSI_DB_DATA_AGG_CMD_MASK 0x3 -#define ISCSI_DB_DATA_AGG_CMD_SHIFT 2 -#define ISCSI_DB_DATA_BYPASS_EN_MASK 0x1 -#define ISCSI_DB_DATA_BYPASS_EN_SHIFT 4 -#define ISCSI_DB_DATA_RESERVED_MASK 0x1 -#define ISCSI_DB_DATA_RESERVED_SHIFT 5 -#define ISCSI_DB_DATA_AGG_VAL_SEL_MASK 0x3 -#define ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT 6 - u8 agg_flags; - __le16 sq_prod; -}; - struct tstorm_iscsi_task_ag_ctx { u8 byte0; u8 byte1; @@ -1398,5 +1376,20 @@ struct tstorm_iscsi_task_ag_ctx { __le32 reg1; __le32 reg2; }; +struct iscsi_db_data { + u8 params; +#define ISCSI_DB_DATA_DEST_MASK 0x3 +#define ISCSI_DB_DATA_DEST_SHIFT 0 +#define ISCSI_DB_DATA_AGG_CMD_MASK 0x3 +#define ISCSI_DB_DATA_AGG_CMD_SHIFT 2 +#define ISCSI_DB_DATA_BYPASS_EN_MASK 0x1 +#define ISCSI_DB_DATA_BYPASS_EN_SHIFT 4 +#define ISCSI_DB_DATA_RESERVED_MASK 0x1 +#define ISCSI_DB_DATA_RESERVED_SHIFT 5 +#define ISCSI_DB_DATA_AGG_VAL_SEL_MASK 0x3 +#define ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT 6 + u8 agg_flags; + __le16 sq_prod; +}; #endif /* __ISCSI_COMMON__ */ diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index 72c770f9f666..a9b3050f469c 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -42,7 +42,7 @@ #define RDMA_MAX_SGE_PER_SQ_WQE (4) #define RDMA_MAX_SGE_PER_RQ_WQE (4) -#define RDMA_MAX_DATA_SIZE_IN_WQE (0x7FFFFFFF) +#define RDMA_MAX_DATA_SIZE_IN_WQE (0x80000000) #define RDMA_REQ_RD_ATOMIC_ELM_SIZE (0x50) #define RDMA_RESP_RD_ATOMIC_ELM_SIZE (0x20) diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h index 866f063026de..fe6a33e45977 100644 --- a/include/linux/qed/roce_common.h +++ b/include/linux/qed/roce_common.h @@ -37,6 +37,8 @@ #define ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE (288) #define ROCE_MAX_QPS (32 * 1024) +#define ROCE_DCQCN_NP_MAX_QPS (64) +#define ROCE_DCQCN_RP_MAX_QPS (64) enum roce_async_events_type { ROCE_ASYNC_EVENT_NONE = 0, diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h index a5e843268f0e..dbf7a43c3e1f 100644 --- a/include/linux/qed/tcp_common.h +++ b/include/linux/qed/tcp_common.h @@ -111,7 +111,6 @@ struct tcp_offload_params { __le32 snd_wnd; __le32 rcv_wnd; __le32 snd_wl1; - __le32 ts_time; __le32 ts_recent; __le32 ts_recent_age; __le32 total_rt; @@ -122,7 +121,7 @@ struct tcp_offload_params { u8 ka_probe_cnt; u8 rt_cnt; __le16 rtt_var; - __le16 reserved2; + __le16 fw_internal; __le32 ka_timeout; __le32 ka_interval; __le32 max_rt_time; @@ -130,7 +129,7 @@ struct tcp_offload_params { u8 snd_wnd_scale; u8 ack_frequency; __le16 da_timeout_value; - __le32 ts_ticks_per_second; + __le32 reserved3[2]; }; struct tcp_offload_params_opt2 { -- cgit v1.2.3 From 1dbe0ccb0631c4ed399261934fe16f07407b078d Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 18 May 2017 15:16:49 +0800 Subject: regulator: axp20x-regulator: add support for AXP803 AXP803 PMIC also have a series of regulators (DCDCs and LDOs) controllable via I2C/RSB bus. Add support for them. Signed-off-by: Icenowy Zheng Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown --- include/linux/mfd/axp20x.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index cde56cfe8446..965b027e31b3 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -119,6 +119,17 @@ enum axp20x_variants { #define AXP806_BUS_ADDR_EXT 0xfe #define AXP806_REG_ADDR_EXT 0xff +#define AXP803_POLYPHASE_CTRL 0x14 +#define AXP803_FLDO1_V_OUT 0x1c +#define AXP803_FLDO2_V_OUT 0x1d +#define AXP803_DCDC1_V_OUT 0x20 +#define AXP803_DCDC2_V_OUT 0x21 +#define AXP803_DCDC3_V_OUT 0x22 +#define AXP803_DCDC4_V_OUT 0x23 +#define AXP803_DCDC5_V_OUT 0x24 +#define AXP803_DCDC6_V_OUT 0x25 +#define AXP803_DCDC_FREQ_CTRL 0x3b + /* Interrupt */ #define AXP152_IRQ1_EN 0x40 #define AXP152_IRQ2_EN 0x41 @@ -350,6 +361,32 @@ enum { AXP809_REG_ID_MAX, }; +enum { + AXP803_DCDC1 = 0, + AXP803_DCDC2, + AXP803_DCDC3, + AXP803_DCDC4, + AXP803_DCDC5, + AXP803_DCDC6, + AXP803_DC1SW, + AXP803_ALDO1, + AXP803_ALDO2, + AXP803_ALDO3, + AXP803_DLDO1, + AXP803_DLDO2, + AXP803_DLDO3, + AXP803_DLDO4, + AXP803_ELDO1, + AXP803_ELDO2, + AXP803_ELDO3, + AXP803_FLDO1, + AXP803_FLDO2, + AXP803_RTC_LDO, + AXP803_LDO_IO0, + AXP803_LDO_IO1, + AXP803_REG_ID_MAX, +}; + /* IRQs */ enum { AXP152_IRQ_LDO0IN_CONNECT = 1, -- cgit v1.2.3 From 032838f9cb4014af8a974374db9e2ce6f3aa8d3b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 8 May 2017 12:33:48 -0700 Subject: drm/pl111: Register the clock divider and use it. This is required for the panel to work on bcm911360, where CLCDCLK is the fixed 200Mhz AXI41 clock. The rate set is still passed up to the CLCDCLK, for platforms that have a settable rate on that one. v2: Set SET_RATE_PARENT (caught by Linus Walleij), depend on COMMON_CLK. v3: Mark the clk_ops static (caught by Stephen). Signed-off-by: Eric Anholt Link: http://patchwork.freedesktop.org/patch/msgid/20170508193348.30236-1-eric@anholt.net Reviewed-by: Linus Walleij Reviewed-by: Stephen Boyd --- include/linux/amba/clcd-regs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amba/clcd-regs.h b/include/linux/amba/clcd-regs.h index 69c0e2143003..516a6fda83c5 100644 --- a/include/linux/amba/clcd-regs.h +++ b/include/linux/amba/clcd-regs.h @@ -39,12 +39,17 @@ #define CLCD_PALL 0x00000200 #define CLCD_PALETTE 0x00000200 +#define TIM2_PCD_LO_MASK GENMASK(4, 0) +#define TIM2_PCD_LO_BITS 5 #define TIM2_CLKSEL (1 << 5) #define TIM2_IVS (1 << 11) #define TIM2_IHS (1 << 12) #define TIM2_IPC (1 << 13) #define TIM2_IOE (1 << 14) #define TIM2_BCD (1 << 26) +#define TIM2_PCD_HI_MASK GENMASK(31, 27) +#define TIM2_PCD_HI_BITS 5 +#define TIM2_PCD_HI_SHIFT 27 #define CNTL_LCDEN (1 << 0) #define CNTL_LCDBPP1 (0 << 1) -- cgit v1.2.3 From 9617813dba5b6c112922c60cd2bc57c6e11ae907 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 18 May 2017 15:44:37 +0200 Subject: skbuff: add stub to help computing crc32c on SCTP packets sctp_compute_checksum requires crc32c symbol (provided by libcrc32c), so it can't be used in net core. Like it has been done previously with other symbols (e.g. ipv6_dst_lookup), introduce a stub struct skb_checksum_ops to allow computation of crc32c checksum in net core after sctp.ko (and thus libcrc32c) has been loaded. Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7c0cb2ce8b01..b1f46a0d18e2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3076,6 +3076,8 @@ struct skb_checksum_ops { __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); }; +extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly; + __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum, const struct skb_checksum_ops *ops); __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, -- cgit v1.2.3 From b72b5bf6a8fc9065f270ae135bbd47abb9d96790 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 18 May 2017 15:44:38 +0200 Subject: net: introduce skb_crc32c_csum_help skb_crc32c_csum_help is like skb_checksum_help, but it is designed for checksumming SCTP packets using crc32c (see RFC3309), provided that libcrc32c.ko has been loaded before. In case libcrc32c is not loaded, invoking skb_crc32c_csum_help on a skb results in one the following printouts: warn_crc32c_csum_update: attempt to compute crc32c without libcrc32c.ko warn_crc32c_csum_combine: attempt to compute crc32c without libcrc32c.ko Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/linux/skbuff.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0150b2dd3031..abbc72e09f11 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3931,6 +3931,7 @@ void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev); int skb_checksum_help(struct sk_buff *skb); +int skb_crc32c_csum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b1f46a0d18e2..62d62964c743 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -193,7 +193,8 @@ * accordingly. Note the there is no indication in the skbuff that the * CHECKSUM_PARTIAL refers to an SCTP checksum, a driver that supports * both IP checksum offload and SCTP CRC offload must verify which offload - * is configured for a packet presumably by inspecting packet headers. + * is configured for a packet presumably by inspecting packet headers; in + * case, skb_crc32c_csum_help is provided to compute CRC on SCTP packets. * * NETIF_F_FCOE_CRC - This feature indicates that a device is capable of * offloading the FCOE CRC in a packet. To perform this offload the stack -- cgit v1.2.3 From 219f1d79871257e9603f504dce0fe8ebf47aad08 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 18 May 2017 15:44:39 +0200 Subject: sk_buff: remove support for csum_bad in sk_buff This bit was introduced with commit 5a21232983aa ("net: Support for csum_bad in skbuff") to reduce the stack workload when processing RX packets carrying a wrong Internet Checksum. Up to now, only one driver and GRO core are setting it. Suggested-by: Tom Herbert Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +--- include/linux/skbuff.h | 23 ++--------------------- 2 files changed, 3 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index abbc72e09f11..c1611ace5336 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2573,9 +2573,7 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ __ret = __skb_gro_checksum_validate_complete(skb, \ compute_pseudo(skb, proto)); \ - if (__ret) \ - __skb_mark_checksum_bad(skb); \ - else \ + if (!__ret) \ skb_gro_incr_csum_unnecessary(skb); \ __ret; \ }) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 62d62964c743..c38f890d425e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -685,7 +685,7 @@ struct sk_buff { __u8 csum_valid:1; __u8 csum_complete_sw:1; __u8 csum_level:2; - __u8 csum_bad:1; + __u8 __csum_bad_unused:1; /* one bit hole */ __u8 dst_pending_confirm:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE @@ -3336,21 +3336,6 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) } } -static inline void __skb_mark_checksum_bad(struct sk_buff *skb) -{ - /* Mark current checksum as bad (typically called from GRO - * path). In the case that ip_summed is CHECKSUM_NONE - * this must be the first checksum encountered in the packet. - * When ip_summed is CHECKSUM_UNNECESSARY, this is the first - * checksum after the last one validated. For UDP, a zero - * checksum can not be marked as bad. - */ - - if (skb->ip_summed == CHECKSUM_NONE || - skb->ip_summed == CHECKSUM_UNNECESSARY) - skb->csum_bad = 1; -} - /* Check if we need to perform checksum complete validation. * * Returns true if checksum complete is needed, false otherwise @@ -3404,9 +3389,6 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, skb->csum_valid = 1; return 0; } - } else if (skb->csum_bad) { - /* ip_summed == CHECKSUM_NONE in this case */ - return (__force __sum16)1; } skb->csum = psum; @@ -3466,8 +3448,7 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) static inline bool __skb_checksum_convert_check(struct sk_buff *skb) { - return (skb->ip_summed == CHECKSUM_NONE && - skb->csum_valid && !skb->csum_bad); + return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid); } static inline void __skb_checksum_convert(struct sk_buff *skb, -- cgit v1.2.3 From dba003067a43a9699bef0c4bdbe320ece5a109b8 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 18 May 2017 15:44:40 +0200 Subject: net: use skb->csum_not_inet to identify packets needing crc32c skb->csum_not_inet carries the indication on which algorithm is needed to compute checksum on skb in the transmit path, when skb->ip_summed is equal to CHECKSUM_PARTIAL. If skb carries a SCTP packet and crc32c hasn't been yet written in L4 header, skb->csum_not_inet is assigned to 1; otherwise, assume Internet Checksum is needed and thus set skb->csum_not_inet to 0. Suggested-by: Tom Herbert Signed-off-by: Davide Caratti Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c38f890d425e..a43d2086bb7f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -189,12 +189,13 @@ * * NETIF_F_SCTP_CRC - This feature indicates that a device is capable of * offloading the SCTP CRC in a packet. To perform this offload the stack - * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset - * accordingly. Note the there is no indication in the skbuff that the - * CHECKSUM_PARTIAL refers to an SCTP checksum, a driver that supports - * both IP checksum offload and SCTP CRC offload must verify which offload - * is configured for a packet presumably by inspecting packet headers; in - * case, skb_crc32c_csum_help is provided to compute CRC on SCTP packets. + * will set set csum_start and csum_offset accordingly, set ip_summed to + * CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication in + * the skbuff that the CHECKSUM_PARTIAL refers to CRC32c. + * A driver that supports both IP checksum offload and SCTP CRC32c offload + * must verify which offload is configured for a packet by testing the + * value of skb->csum_not_inet; skb_crc32c_csum_help is provided to resolve + * CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1. * * NETIF_F_FCOE_CRC - This feature indicates that a device is capable of * offloading the FCOE CRC in a packet. To perform this offload the stack @@ -557,6 +558,7 @@ typedef unsigned char *sk_buff_data_t; * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS + * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL * @dst_pending_confirm: need to confirm neighbour * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking @@ -685,7 +687,7 @@ struct sk_buff { __u8 csum_valid:1; __u8 csum_complete_sw:1; __u8 csum_level:2; - __u8 __csum_bad_unused:1; /* one bit hole */ + __u8 csum_not_inet:1; __u8 dst_pending_confirm:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE -- cgit v1.2.3 From 43c26a1a45938624fb9301e8bf7dfabbed293619 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 18 May 2017 15:44:41 +0200 Subject: net: more accurate checksumming in validate_xmit_skb() skb_csum_hwoffload_help() uses netdev features and skb->csum_not_inet to determine if skb needs software computation of Internet Checksum or crc32c (or nothing, if this computation can be done by the hardware). Use it in place of skb_checksum_help() in validate_xmit_skb() to avoid corruption of non-GSO SCTP packets having skb->ip_summed equal to CHECKSUM_PARTIAL. While at it, remove references to skb_csum_off_chk* functions, since they are not present anymore in Linux _ see commit cf53b1da73bd ("Revert "net: Add driver helper functions to determine checksum offloadability""). Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/linux/skbuff.h | 13 +++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c1611ace5336..f8f7cd52a0a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3930,6 +3930,9 @@ void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev); int skb_checksum_help(struct sk_buff *skb); int skb_crc32c_csum_help(struct sk_buff *skb); +int skb_csum_hwoffload_help(struct sk_buff *skb, + const netdev_features_t features); + struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a43d2086bb7f..43d7ca07b2ff 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -162,14 +162,11 @@ * * NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM are being deprecated in favor of * NETIF_F_HW_CSUM. New devices should use NETIF_F_HW_CSUM to indicate - * checksum offload capability. If a device has limited checksum capabilities - * (for instance can only perform NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM as - * described above) a helper function can be called to resolve - * CHECKSUM_PARTIAL. The helper functions are skb_csum_off_chk*. The helper - * function takes a spec argument that describes the protocol layer that is - * supported for checksum offload and can be called for each packet. If a - * packet does not match the specification for offload, skb_checksum_help - * is called to resolve the checksum. + * checksum offload capability. + * skb_csum_hwoffload_help() can be called to resolve CHECKSUM_PARTIAL based + * on network device checksumming capabilities: if a packet does not match + * them, skb_checksum_help or skb_crc32c_help (depending on the value of + * csum_not_inet, see item D.) is called to resolve the checksum. * * CHECKSUM_NONE: * -- cgit v1.2.3 From b4759dcdcd8466e70f01ff07f33e17cd93131d34 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 18 May 2017 15:44:43 +0200 Subject: sk_buff.h: improve description of CHECKSUM_{COMPLETE, UNNECESSARY} Add FCoE to the list of protocols that can set CHECKSUM_UNNECESSARY; add a note to CHECKSUM_COMPLETE section to specify that it does not apply to SCTP and FCoE protocols. Suggested-by: Tom Herbert Signed-off-by: Davide Caratti Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 43d7ca07b2ff..1713e4b7ea9f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -109,6 +109,7 @@ * may perform further validation in this case. * GRE: only if the checksum is present in the header. * SCTP: indicates the CRC in SCTP header has been validated. + * FCOE: indicates the CRC in FC frame has been validated. * * skb->csum_level indicates the number of consecutive checksums found in * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. @@ -126,8 +127,10 @@ * packet as seen by netif_rx() and fills out in skb->csum. Meaning, the * hardware doesn't need to parse L3/L4 headers to implement this. * - * Note: Even if device supports only some protocols, but is able to produce - * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY. + * Notes: + * - Even if device supports only some protocols, but is able to produce + * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY. + * - CHECKSUM_COMPLETE is not applicable to SCTP and FCoE protocols. * * CHECKSUM_PARTIAL: * -- cgit v1.2.3 From 32d0f7830d9be5b1652a718e050d808b4908155f Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Thu, 18 May 2017 15:13:43 -0700 Subject: phy: Add helper function to check phy interface mode Added helper function that checks phy_mode is RGMII (all variants) 'bool phy_interface_mode_is_rgmii(phy_interface_t mode)' Changed the following function, to use the above. 'bool phy_interface_is_rgmii(struct phy_device *phydev)' Signed-off-by: Iyappan Subramanian Suggested-by: Florian Fainelli Suggested-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 54ef45823fc1..5a808a26e4cf 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -715,6 +715,17 @@ static inline bool phy_is_internal(struct phy_device *phydev) return phydev->is_internal; } +/** + * phy_interface_mode_is_rgmii - Convenience function for testing if a + * PHY interface mode is RGMII (all variants) + * @mode: the phy_interface_t enum + */ +static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode) +{ + return mode >= PHY_INTERFACE_MODE_RGMII && + mode <= PHY_INTERFACE_MODE_RGMII_TXID; +}; + /** * phy_interface_is_rgmii - Convenience function for testing if a PHY interface * is RGMII (all variants) @@ -722,8 +733,7 @@ static inline bool phy_is_internal(struct phy_device *phydev) */ static inline bool phy_interface_is_rgmii(struct phy_device *phydev) { - return phydev->interface >= PHY_INTERFACE_MODE_RGMII && - phydev->interface <= PHY_INTERFACE_MODE_RGMII_TXID; + return phy_interface_mode_is_rgmii(phydev->interface); }; /* -- cgit v1.2.3 From ce72a16fa705f960ca2352e95a7c5f4801475e75 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 May 2017 20:25:02 -0400 Subject: wait4(2)/waitid(2): separate copying rusage to userland New helpers: kernel_waitid() and kernel_wait4(). sys_waitid(), sys_wait4() and their compat variants switched to those. Copying struct rusage to userland is left to syscall itself. For compat_sys_wait4() that eliminates the use of set_fs() completely. For compat_sys_waitid() it's still needed (for siginfo handling); that will change shortly. Signed-off-by: Al Viro --- include/linux/resource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/resource.h b/include/linux/resource.h index 5bc3116e649c..277afdad6589 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -6,7 +6,7 @@ struct task_struct; -int getrusage(struct task_struct *p, int who, struct rusage __user *ru); +void getrusage(struct task_struct *p, int who, struct rusage *ru); int do_prlimit(struct task_struct *tsk, unsigned int resource, struct rlimit *new_rlim, struct rlimit *old_rlim); -- cgit v1.2.3 From 92ebce5ac55dba258c608248dddf59eca3f7f514 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 May 2017 23:54:33 -0400 Subject: osf_wait4: switch to kernel_wait4() ... and sanitize copying rusage to userland Signed-off-by: Al Viro --- include/linux/sched/task.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a978d7189cfd..6b830fd9d809 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -9,6 +9,7 @@ #include struct task_struct; +struct rusage; union thread_union; /* @@ -74,6 +75,7 @@ extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); +extern long kernel_wait4(pid_t, int *, int, struct rusage *); extern void free_task(struct task_struct *tsk); -- cgit v1.2.3 From 90b602f80397657429373ca009f98aec4dd3c553 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 19 May 2017 17:52:37 +0200 Subject: net: add function to retrieve original skb device using NAPI ID Since commit b68581778cd0 ("net: Make skb->skb_iif always track skb->dev") skbs don't have the original index of the interface which received the packet. This information is now needed for a new control message related to hardware timestamping. Instead of adding a new field to skb, we can find the device by the NAPI ID if it is available, i.e. CONFIG_NET_RX_BUSY_POLL is enabled and the driver is using NAPI. Add dev_get_by_napi_id() and also skb_napi_id() to hide the CONFIG_NET_RX_BUSY_POLL ifdef. CC: Richard Cochran Suggested-by: Willem de Bruijn Acked-by: Willem de Bruijn Signed-off-by: Miroslav Lichvar Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/linux/skbuff.h | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f8f7cd52a0a0..c50c9218e31e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2456,6 +2456,7 @@ static inline int dev_recursion_level(void) struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); +struct net_device *dev_get_by_napi_id(unsigned int napi_id); int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1713e4b7ea9f..8acce7143f6a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -858,6 +858,15 @@ static inline bool skb_pkt_type_ok(u32 ptype) return ptype <= PACKET_OTHERHOST; } +static inline unsigned int skb_napi_id(const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + return skb->napi_id; +#else + return 0; +#endif +} + void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_tx_error(struct sk_buff *skb); -- cgit v1.2.3 From b50a5c70ffa4fd6b6da324ab54c84adf48fb17d9 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 19 May 2017 17:52:40 +0200 Subject: net: allow simultaneous SW and HW transmit timestamping Add SOF_TIMESTAMPING_OPT_TX_SWHW option to allow an outgoing packet to be looped to the socket's error queue with a software timestamp even when a hardware transmit timestamp is expected to be provided by the driver. Applications using this option will receive two separate messages from the error queue, one with a software timestamp and the other with a hardware timestamp. As the hardware timestamp is saved to the shared skb info, which may happen before the first message with software timestamp is received by the application, the hardware timestamp is copied to the SCM_TIMESTAMPING control message only when the skb has no software timestamp or it is an incoming packet. While changing sw_tx_timestamp(), inline it in skb_tx_timestamp() as there are no other users. CC: Richard Cochran CC: Willem de Bruijn Signed-off-by: Miroslav Lichvar Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8acce7143f6a..45a59c1e0cc7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3259,13 +3259,6 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps); -static inline void sw_tx_timestamp(struct sk_buff *skb) -{ - if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP && - !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) - skb_tstamp_tx(skb, NULL); -} - /** * skb_tx_timestamp() - Driver hook for transmit timestamping * @@ -3281,7 +3274,8 @@ static inline void sw_tx_timestamp(struct sk_buff *skb) static inline void skb_tx_timestamp(struct sk_buff *skb) { skb_clone_tx_timestamp(skb); - sw_tx_timestamp(skb); + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) + skb_tstamp_tx(skb, NULL); } /** -- cgit v1.2.3 From 91b9ae48aadd7e634161372b0bc3ffc88a050e8b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 22:30:33 +0200 Subject: HID: i2c-hid: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/i2c/i2c-hid.h | 42 ----------------------------------- include/linux/platform_data/i2c-hid.h | 42 +++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 42 deletions(-) delete mode 100644 include/linux/i2c/i2c-hid.h create mode 100644 include/linux/platform_data/i2c-hid.h (limited to 'include/linux') diff --git a/include/linux/i2c/i2c-hid.h b/include/linux/i2c/i2c-hid.h deleted file mode 100644 index 1fb088239d12..000000000000 --- a/include/linux/i2c/i2c-hid.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * HID over I2C protocol implementation - * - * Copyright (c) 2012 Benjamin Tissoires - * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive for - * more details. - */ - -#ifndef __LINUX_I2C_HID_H -#define __LINUX_I2C_HID_H - -#include - -struct regulator; - -/** - * struct i2chid_platform_data - used by hid over i2c implementation. - * @hid_descriptor_address: i2c register where the HID descriptor is stored. - * @supply: regulator for powering on the device. - * @post_power_delay_ms: delay after powering on before device is usable. - * - * Note that it is the responsibility of the platform driver (or the acpi 5.0 - * driver, or the flattened device tree) to setup the irq related to the gpio in - * the struct i2c_board_info. - * The platform driver should also setup the gpio according to the device: - * - * A typical example is the following: - * irq = gpio_to_irq(intr_gpio); - * hkdk4412_i2c_devs5[0].irq = irq; // store the irq in i2c_board_info - * gpio_request(intr_gpio, "elan-irq"); - * s3c_gpio_setpull(intr_gpio, S3C_GPIO_PULL_UP); - */ -struct i2c_hid_platform_data { - u16 hid_descriptor_address; - struct regulator *supply; - int post_power_delay_ms; -}; - -#endif /* __LINUX_I2C_HID_H */ diff --git a/include/linux/platform_data/i2c-hid.h b/include/linux/platform_data/i2c-hid.h new file mode 100644 index 000000000000..1fb088239d12 --- /dev/null +++ b/include/linux/platform_data/i2c-hid.h @@ -0,0 +1,42 @@ +/* + * HID over I2C protocol implementation + * + * Copyright (c) 2012 Benjamin Tissoires + * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + */ + +#ifndef __LINUX_I2C_HID_H +#define __LINUX_I2C_HID_H + +#include + +struct regulator; + +/** + * struct i2chid_platform_data - used by hid over i2c implementation. + * @hid_descriptor_address: i2c register where the HID descriptor is stored. + * @supply: regulator for powering on the device. + * @post_power_delay_ms: delay after powering on before device is usable. + * + * Note that it is the responsibility of the platform driver (or the acpi 5.0 + * driver, or the flattened device tree) to setup the irq related to the gpio in + * the struct i2c_board_info. + * The platform driver should also setup the gpio according to the device: + * + * A typical example is the following: + * irq = gpio_to_irq(intr_gpio); + * hkdk4412_i2c_devs5[0].irq = irq; // store the irq in i2c_board_info + * gpio_request(intr_gpio, "elan-irq"); + * s3c_gpio_setpull(intr_gpio, S3C_GPIO_PULL_UP); + */ +struct i2c_hid_platform_data { + u16 hid_descriptor_address; + struct regulator *supply; + int post_power_delay_ms; +}; + +#endif /* __LINUX_I2C_HID_H */ -- cgit v1.2.3 From 6e7edabfc6a8ac5dce8c55363a7bb1576fc9348f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 11 May 2017 19:11:11 +0200 Subject: HID: Microsoft Win8 Wireless Radio Controls cleanup Use a better URL for the HUTRR40 Radio HID Usages documentation and use the HID_GD_WIRELESS_RADIO_CTLS define rather then hardcoding a check for 0x0001000c. Fixes: 61df56bef9 ("HID: Add mapping for Microsoft Win8 Wireless Radio Controls extensions") Suggested-by: Benjamin Tissoires Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/hid.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 0b29466bbc21..bebbf4893448 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -183,9 +183,8 @@ struct hid_item { #define HID_GD_KEYPAD 0x00010007 #define HID_GD_MULTIAXIS 0x00010008 /* - * Microsoft Win8 Wireless Radio Controls extensions CA, see (checked 09052017): - * https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management - * https://web.archive.org/web/20170509144631/https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management + * Microsoft Win8 Wireless Radio Controls extensions CA, see: + * http://www.usb.org/developers/hidpage/HUTRR40RadioHIDUsagesFinal.pdf */ #define HID_GD_WIRELESS_RADIO_CTLS 0x0001000c #define HID_GD_X 0x00010030 -- cgit v1.2.3 From c3ab2b4ec8f7c0700bf10957171c479bf3dbca52 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 21 May 2017 10:12:03 -0600 Subject: net: ipv4: Add extack messages for route add failures Add messages for non-obvious errors (e.g, no need to add text for malloc failures or ENODEV failures). This mostly covers the annoying EINVAL errors Some message strings violate the 80-columns but searchable strings need to trump that rule. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 5fff5ba5964e..a68aad484c69 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -97,6 +97,11 @@ struct netlink_ext_ack { #define NL_SET_ERR_MSG_MOD(extack, msg) \ NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) +#define NL_SET_BAD_ATTR(extack, attr) do { \ + if ((extack)) \ + (extack)->bad_attr = (attr); \ +} while (0) + extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); -- cgit v1.2.3 From 6f9a22bc5775d231ab8fbe2c2f3c88e45e3e7c28 Mon Sep 17 00:00:00 2001 From: Michael Hernandez Date: Thu, 18 May 2017 10:47:47 -0700 Subject: PCI/MSI: Ignore affinity if pre/post vector count is more than min_vecs min_vecs is the minimum amount of vectors needed to operate in MSI-X mode which may just include the vectors that don't need affinity. Disabling affinity settings causes the qla2xxx driver scsi_add_host() to fail when blk_mq is enabled as the blk_mq_pci_map_queues() expects affinity masks on each vector. Fixes: dfef358bd1be ("PCI/MSI: Don't apply affinity if there aren't enough vectors left") Signed-off-by: Michael Hernandez Signed-off-by: Himanshu Madhani Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org # v4.10+ --- include/linux/interrupt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a6fba4804672..0991f973f8ca 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -291,7 +291,7 @@ extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct cpumask *irq_create_affinity_masks(int nvec, const struct irq_affinity *affd); -int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd); +int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd); #else /* CONFIG_SMP */ @@ -331,7 +331,7 @@ irq_create_affinity_masks(int nvec, const struct irq_affinity *affd) } static inline int -irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd) +irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd) { return maxvec; } -- cgit v1.2.3 From f81126b0b67c864b0b3d614d8adadc3a37ba5209 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 22 May 2017 16:24:22 -0700 Subject: Input: lm8323 - move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Dmitry Torokhov --- include/linux/i2c/lm8323.h | 46 ------------------------------------ include/linux/platform_data/lm8323.h | 46 ++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 46 deletions(-) delete mode 100644 include/linux/i2c/lm8323.h create mode 100644 include/linux/platform_data/lm8323.h (limited to 'include/linux') diff --git a/include/linux/i2c/lm8323.h b/include/linux/i2c/lm8323.h deleted file mode 100644 index 478d668bc590..000000000000 --- a/include/linux/i2c/lm8323.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * lm8323.h - Configuration for LM8323 keypad driver. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation (version 2 of the License only). - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __LINUX_LM8323_H -#define __LINUX_LM8323_H - -#include - -/* - * Largest keycode that the chip can send, plus one, - * so keys can be mapped directly at the index of the - * LM8323 keycode instead of subtracting one. - */ -#define LM8323_KEYMAP_SIZE (0x7f + 1) - -#define LM8323_NUM_PWMS 3 - -struct lm8323_platform_data { - int debounce_time; /* Time to watch for key bouncing, in ms. */ - int active_time; /* Idle time until sleep, in ms. */ - - int size_x; - int size_y; - bool repeat; - const unsigned short *keymap; - - const char *pwm_names[LM8323_NUM_PWMS]; - - const char *name; /* Device name. */ -}; - -#endif /* __LINUX_LM8323_H */ diff --git a/include/linux/platform_data/lm8323.h b/include/linux/platform_data/lm8323.h new file mode 100644 index 000000000000..478d668bc590 --- /dev/null +++ b/include/linux/platform_data/lm8323.h @@ -0,0 +1,46 @@ +/* + * lm8323.h - Configuration for LM8323 keypad driver. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License only). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __LINUX_LM8323_H +#define __LINUX_LM8323_H + +#include + +/* + * Largest keycode that the chip can send, plus one, + * so keys can be mapped directly at the index of the + * LM8323 keycode instead of subtracting one. + */ +#define LM8323_KEYMAP_SIZE (0x7f + 1) + +#define LM8323_NUM_PWMS 3 + +struct lm8323_platform_data { + int debounce_time; /* Time to watch for key bouncing, in ms. */ + int active_time; /* Idle time until sleep, in ms. */ + + int size_x; + int size_y; + bool repeat; + const unsigned short *keymap; + + const char *pwm_names[LM8323_NUM_PWMS]; + + const char *name; /* Device name. */ +}; + +#endif /* __LINUX_LM8323_H */ -- cgit v1.2.3 From 8cd9ab9e19b8cf23a7cae503af81ae70154e7956 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 22 May 2017 16:26:56 -0700 Subject: Input: mcs - move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Dmitry Torokhov --- include/linux/i2c/mcs.h | 35 ----------------------------------- include/linux/platform_data/mcs.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 35 deletions(-) delete mode 100644 include/linux/i2c/mcs.h create mode 100644 include/linux/platform_data/mcs.h (limited to 'include/linux') diff --git a/include/linux/i2c/mcs.h b/include/linux/i2c/mcs.h deleted file mode 100644 index 61bb18a4fd3c..000000000000 --- a/include/linux/i2c/mcs.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2009 - 2010 Samsung Electronics Co.Ltd - * Author: Joonyoung Shim - * Author: HeungJun Kim - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ - -#ifndef __LINUX_MCS_H -#define __LINUX_MCS_H - -#define MCS_KEY_MAP(v, c) ((((v) & 0xff) << 16) | ((c) & 0xffff)) -#define MCS_KEY_VAL(v) (((v) >> 16) & 0xff) -#define MCS_KEY_CODE(v) ((v) & 0xffff) - -struct mcs_platform_data { - void (*poweron)(bool); - void (*cfg_pin)(void); - - /* touchscreen */ - unsigned int x_size; - unsigned int y_size; - - /* touchkey */ - const u32 *keymap; - unsigned int keymap_size; - unsigned int key_maxval; - bool no_autorepeat; -}; - -#endif /* __LINUX_MCS_H */ diff --git a/include/linux/platform_data/mcs.h b/include/linux/platform_data/mcs.h new file mode 100644 index 000000000000..61bb18a4fd3c --- /dev/null +++ b/include/linux/platform_data/mcs.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2009 - 2010 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim + * Author: HeungJun Kim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __LINUX_MCS_H +#define __LINUX_MCS_H + +#define MCS_KEY_MAP(v, c) ((((v) & 0xff) << 16) | ((c) & 0xffff)) +#define MCS_KEY_VAL(v) (((v) >> 16) & 0xff) +#define MCS_KEY_CODE(v) ((v) & 0xffff) + +struct mcs_platform_data { + void (*poweron)(bool); + void (*cfg_pin)(void); + + /* touchscreen */ + unsigned int x_size; + unsigned int y_size; + + /* touchkey */ + const u32 *keymap; + unsigned int keymap_size; + unsigned int key_maxval; + bool no_autorepeat; +}; + +#endif /* __LINUX_MCS_H */ -- cgit v1.2.3 From 0d846a4cbbcbf81e527542d15e165b8f774bace5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 22 May 2017 16:28:42 -0700 Subject: Input: mms114 - move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Dmitry Torokhov --- include/linux/i2c/mms114.h | 24 ------------------------ include/linux/platform_data/mms114.h | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 24 deletions(-) delete mode 100644 include/linux/i2c/mms114.h create mode 100644 include/linux/platform_data/mms114.h (limited to 'include/linux') diff --git a/include/linux/i2c/mms114.h b/include/linux/i2c/mms114.h deleted file mode 100644 index 5722ebfb2738..000000000000 --- a/include/linux/i2c/mms114.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) 2012 Samsung Electronics Co.Ltd - * Author: Joonyoung Shim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundationr - */ - -#ifndef __LINUX_MMS114_H -#define __LINUX_MMS114_H - -struct mms114_platform_data { - unsigned int x_size; - unsigned int y_size; - unsigned int contact_threshold; - unsigned int moving_threshold; - bool x_invert; - bool y_invert; - - void (*cfg_pin)(bool); -}; - -#endif /* __LINUX_MMS114_H */ diff --git a/include/linux/platform_data/mms114.h b/include/linux/platform_data/mms114.h new file mode 100644 index 000000000000..5722ebfb2738 --- /dev/null +++ b/include/linux/platform_data/mms114.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2012 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundationr + */ + +#ifndef __LINUX_MMS114_H +#define __LINUX_MMS114_H + +struct mms114_platform_data { + unsigned int x_size; + unsigned int y_size; + unsigned int contact_threshold; + unsigned int moving_threshold; + bool x_invert; + bool y_invert; + + void (*cfg_pin)(bool); +}; + +#endif /* __LINUX_MMS114_H */ -- cgit v1.2.3 From 8fd708157a592a376c4d0b3b2ba23b9e9f79caa5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 22 May 2017 16:30:04 -0700 Subject: Input: tsc2007 - move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Lee Jones Signed-off-by: Dmitry Torokhov --- include/linux/i2c/tsc2007.h | 22 ---------------------- include/linux/platform_data/tsc2007.h | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) delete mode 100644 include/linux/i2c/tsc2007.h create mode 100644 include/linux/platform_data/tsc2007.h (limited to 'include/linux') diff --git a/include/linux/i2c/tsc2007.h b/include/linux/i2c/tsc2007.h deleted file mode 100644 index 4f35b6ad3889..000000000000 --- a/include/linux/i2c/tsc2007.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __LINUX_I2C_TSC2007_H -#define __LINUX_I2C_TSC2007_H - -/* linux/i2c/tsc2007.h */ - -struct tsc2007_platform_data { - u16 model; /* 2007. */ - u16 x_plate_ohms; /* must be non-zero value */ - u16 max_rt; /* max. resistance above which samples are ignored */ - unsigned long poll_period; /* time (in ms) between samples */ - int fuzzx; /* fuzz factor for X, Y and pressure axes */ - int fuzzy; - int fuzzz; - - int (*get_pendown_state)(struct device *); - /* If needed, clear 2nd level interrupt source */ - void (*clear_penirq)(void); - int (*init_platform_hw)(void); - void (*exit_platform_hw)(void); -}; - -#endif diff --git a/include/linux/platform_data/tsc2007.h b/include/linux/platform_data/tsc2007.h new file mode 100644 index 000000000000..c2d3aa1dadd4 --- /dev/null +++ b/include/linux/platform_data/tsc2007.h @@ -0,0 +1,22 @@ +#ifndef __LINUX_I2C_TSC2007_H +#define __LINUX_I2C_TSC2007_H + +/* linux/platform_data/tsc2007.h */ + +struct tsc2007_platform_data { + u16 model; /* 2007. */ + u16 x_plate_ohms; /* must be non-zero value */ + u16 max_rt; /* max. resistance above which samples are ignored */ + unsigned long poll_period; /* time (in ms) between samples */ + int fuzzx; /* fuzz factor for X, Y and pressure axes */ + int fuzzy; + int fuzzz; + + int (*get_pendown_state)(struct device *); + /* If needed, clear 2nd level interrupt source */ + void (*clear_penirq)(void); + int (*init_platform_hw)(void); + void (*exit_platform_hw)(void); +}; + +#endif -- cgit v1.2.3 From d795cb51dfee2a859b5585101a4e3ce5bc9bff75 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 15 May 2017 11:24:27 +0200 Subject: pinctrl: mcp23s08: drop pullup config from pdata mcp23s08 support configuration of the pullups using the pinconf framework. This removes the custom pullup configuration from platform data, which has no upstream users. Signed-off-by: Sebastian Reichel Signed-off-by: Linus Walleij --- include/linux/spi/mcp23s08.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h index aa07d7b32568..080ecc6bb270 100644 --- a/include/linux/spi/mcp23s08.h +++ b/include/linux/spi/mcp23s08.h @@ -3,7 +3,6 @@ struct mcp23s08_chip_info { bool is_present; /* true if populated */ - unsigned pullups; /* BIT(x) means enable pullup x */ }; struct mcp23s08_platform_data { -- cgit v1.2.3 From 5b1a7e803a9fd960b6d75a1d970519c57cfe2618 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 15 May 2017 11:24:35 +0200 Subject: pinctrl: mcp23s08: generalize irq property handling This moves irq property handling from spi/i2c specific code into the generic mcp23s08_probe_one. This is possible because the device properties are named equally. As a side-effect this drops support for setting the properties via pdata, which has no mainline users. If boardcode wants to enable the chip as interrupt controller it can attach the device properties instead. Signed-off-by: Sebastian Reichel Signed-off-by: Linus Walleij --- include/linux/spi/mcp23s08.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h index 080ecc6bb270..4af82ee63329 100644 --- a/include/linux/spi/mcp23s08.h +++ b/include/linux/spi/mcp23s08.h @@ -21,22 +21,4 @@ struct mcp23s08_platform_data { * base to base+15 (or base+31 for s17 variant). */ unsigned base; - /* Marks the device as a interrupt controller. - * NOTE: The interrupt functionality is only supported for i2c - * versions of the chips. The spi chips can also do the interrupts, - * but this is not supported by the linux driver yet. - */ - bool irq_controller; - - /* Sets the mirror flag in the IOCON register. Devices - * with two interrupt outputs (these are the devices ending with 17 and - * those that have 16 IOs) have two IO banks: IO 0-7 form bank 1 and - * IO 8-15 are bank 2. These chips have two different interrupt outputs: - * One for bank 1 and another for bank 2. If irq-mirror is set, both - * interrupts are generated regardless of the bank that an input change - * occurred on. If it is not set, the interrupt are only generated for - * the bank they belong to. - * On devices with only one interrupt output this property is useless. - */ - bool mirror; }; -- cgit v1.2.3 From ce9bd0a0ff106b478012dc2e4c2b10bb0138dd7a Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 15 May 2017 11:24:36 +0200 Subject: pinctrl: mcp23s08: simplify spi_present_mask handling Signed-off-by: Sebastian Reichel Signed-off-by: Linus Walleij --- include/linux/spi/mcp23s08.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h index 4af82ee63329..211f3c0ef49c 100644 --- a/include/linux/spi/mcp23s08.h +++ b/include/linux/spi/mcp23s08.h @@ -1,10 +1,6 @@ /* FIXME driver should be able to handle IRQs... */ -struct mcp23s08_chip_info { - bool is_present; /* true if populated */ -}; - struct mcp23s08_platform_data { /* For mcp23s08, up to 4 slaves (numbered 0..3) can share one SPI * chipselect, each providing 1 gpio_chip instance with 8 gpios. @@ -12,7 +8,7 @@ struct mcp23s08_platform_data { * chipselect, each providing 1 gpio_chip (port A + port B) with * 16 gpios. */ - struct mcp23s08_chip_info chip[8]; + u32 spi_present_mask; /* "base" is the number of the first GPIO. Dynamic assignment is * not currently supported, and even if there are gaps in chip -- cgit v1.2.3 From d8f4494e70ae5fef159719bfbb6abedc53619bf1 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 15 May 2017 11:24:37 +0200 Subject: pinctrl: mcp23s08: drop comment about missing irq support The driver supports using mcp23xxx as interrupt controller, so let's drop all comments stating otherwise. Signed-off-by: Sebastian Reichel Signed-off-by: Linus Walleij --- include/linux/spi/mcp23s08.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h index 211f3c0ef49c..4354beefd584 100644 --- a/include/linux/spi/mcp23s08.h +++ b/include/linux/spi/mcp23s08.h @@ -1,6 +1,3 @@ - -/* FIXME driver should be able to handle IRQs... */ - struct mcp23s08_platform_data { /* For mcp23s08, up to 4 slaves (numbered 0..3) can share one SPI * chipselect, each providing 1 gpio_chip instance with 8 gpios. -- cgit v1.2.3 From 7f38c5b99779554d6c6e5043cfda848b967f2ca9 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 15 May 2017 11:24:38 +0200 Subject: pinctrl: mcp23s08: fix comment for mcp23s08_platform_data.base The comment does not match the driver, which actually supports automatic assignment. Fix this by updating the comment. Signed-off-by: Sebastian Reichel Signed-off-by: Linus Walleij --- include/linux/spi/mcp23s08.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h index 4354beefd584..82d96a346e6f 100644 --- a/include/linux/spi/mcp23s08.h +++ b/include/linux/spi/mcp23s08.h @@ -7,11 +7,11 @@ struct mcp23s08_platform_data { */ u32 spi_present_mask; - /* "base" is the number of the first GPIO. Dynamic assignment is - * not currently supported, and even if there are gaps in chip - * addressing the GPIO numbers are sequential .. so for example - * if only slaves 0 and 3 are present, their GPIOs range from - * base to base+15 (or base+31 for s17 variant). + /* "base" is the number of the first GPIO or -1 for dynamic + * assignment. If there are gaps in chip addressing the GPIO + * numbers are sequential .. so for example if only slaves 0 + * and 3 are present, their GPIOs range from base to base+15 + * (or base+31 for s17 variant). */ unsigned base; }; -- cgit v1.2.3 From 6c8557bdb28df3ae97476c5e2aed6373cd235aab Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2017 12:58:25 +0200 Subject: smp, cpumask: Use non-atomic cpumask_{set,clear}_cpu() The cpumasks in smp_call_function_many() are private and not subject to concurrency, atomic bitops are pointless and expensive. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index a21b1fb9a968..4bf4479a3a80 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -293,6 +293,12 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } +static inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +{ + __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} + + /** * cpumask_clear_cpu - clear a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) @@ -303,6 +309,11 @@ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } +static inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + __clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} + /** * cpumask_test_cpu - test for a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) -- cgit v1.2.3 From d714893e61cd8c6e5c7e095f7dd615aa434bca95 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Fri, 12 May 2017 09:36:56 +0900 Subject: llist: Provide a safe version for llist_for_each() Sometimes we have to dereference next field of llist node before entering loop becasue the node might be deleted or the next field might be modified within the loop. So this adds the safe version of llist_for_each(), that is, llist_for_each_safe(). Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Huang, Ying Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1494549416-10539-1-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/llist.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/llist.h b/include/linux/llist.h index 171baa90f6f6..d11738110a7a 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -109,6 +109,25 @@ static inline void init_llist_head(struct llist_head *list) #define llist_for_each(pos, node) \ for ((pos) = (node); pos; (pos) = (pos)->next) +/** + * llist_for_each_safe - iterate over some deleted entries of a lock-less list + * safe against removal of list entry + * @pos: the &struct llist_node to use as a loop cursor + * @n: another &struct llist_node to use as temporary storage + * @node: the first entry of deleted list entries + * + * In general, some entries of the lock-less list can be traversed + * safely only after being deleted from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_safe(pos, n, node) \ + for ((pos) = (node); (pos) && ((n) = (pos)->next, true); (pos) = (n)) + /** * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type * @pos: the type * to use as a loop cursor. -- cgit v1.2.3 From 69a78ff226fe0241ab6cb9dd961667be477e3cf7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 16 May 2017 20:42:47 +0200 Subject: init: Introduce SYSTEM_SCHEDULING state might_sleep() debugging and smp_processor_id() debugging should be active right after the scheduler starts working. The init task can invoke smp_processor_id() from preemptible context as it is pinned on the boot cpu until sched_smp_init() removes the pinning and lets it schedule on all non isolated cpus. Add a new state which allows to enable those checks earlier and add it to the xen do_poweroff() function. No functional change. Tested-by: Mark Rutland Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Boris Ostrovsky Acked-by: Mark Rutland Cc: Greg Kroah-Hartman Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170516184736.196214622@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 13bc08aba704..1c91f26e2996 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -490,9 +490,13 @@ extern int root_mountflags; extern bool early_boot_irqs_disabled; -/* Values used for system_state */ +/* + * Values used for system_state. Ordering of the states must not be changed + * as code checks for <, <=, >, >= STATE. + */ extern enum system_states { SYSTEM_BOOTING, + SYSTEM_SCHEDULING, SYSTEM_RUNNING, SYSTEM_HALT, SYSTEM_POWER_OFF, -- cgit v1.2.3 From 0a848d638a25b4f2767b260ed83c271854e93cce Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 23:57:25 +0200 Subject: gpio: max732x: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Linus Walleij --- include/linux/i2c/max732x.h | 22 ---------------------- include/linux/platform_data/max732x.h | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) delete mode 100644 include/linux/i2c/max732x.h create mode 100644 include/linux/platform_data/max732x.h (limited to 'include/linux') diff --git a/include/linux/i2c/max732x.h b/include/linux/i2c/max732x.h deleted file mode 100644 index c04bac8bf2fe..000000000000 --- a/include/linux/i2c/max732x.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __LINUX_I2C_MAX732X_H -#define __LINUX_I2C_MAX732X_H - -/* platform data for the MAX732x 8/16-bit I/O expander driver */ - -struct max732x_platform_data { - /* number of the first GPIO */ - unsigned gpio_base; - - /* interrupt base */ - int irq_base; - - void *context; /* param to setup/teardown */ - - int (*setup)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - int (*teardown)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); -}; -#endif /* __LINUX_I2C_MAX732X_H */ diff --git a/include/linux/platform_data/max732x.h b/include/linux/platform_data/max732x.h new file mode 100644 index 000000000000..c04bac8bf2fe --- /dev/null +++ b/include/linux/platform_data/max732x.h @@ -0,0 +1,22 @@ +#ifndef __LINUX_I2C_MAX732X_H +#define __LINUX_I2C_MAX732X_H + +/* platform data for the MAX732x 8/16-bit I/O expander driver */ + +struct max732x_platform_data { + /* number of the first GPIO */ + unsigned gpio_base; + + /* interrupt base */ + int irq_base; + + void *context; /* param to setup/teardown */ + + int (*setup)(struct i2c_client *client, + unsigned gpio, unsigned ngpio, + void *context); + int (*teardown)(struct i2c_client *client, + unsigned gpio, unsigned ngpio, + void *context); +}; +#endif /* __LINUX_I2C_MAX732X_H */ -- cgit v1.2.3 From b6480faeee234829b315168aebcb281ecf95f178 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 23:57:26 +0200 Subject: gpio: pcf857x: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Linus Walleij --- include/linux/i2c/pcf857x.h | 44 ----------------------------------- include/linux/platform_data/pcf857x.h | 44 +++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 44 deletions(-) delete mode 100644 include/linux/i2c/pcf857x.h create mode 100644 include/linux/platform_data/pcf857x.h (limited to 'include/linux') diff --git a/include/linux/i2c/pcf857x.h b/include/linux/i2c/pcf857x.h deleted file mode 100644 index 0767a2a6b2f1..000000000000 --- a/include/linux/i2c/pcf857x.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef __LINUX_PCF857X_H -#define __LINUX_PCF857X_H - -/** - * struct pcf857x_platform_data - data to set up pcf857x driver - * @gpio_base: number of the chip's first GPIO - * @n_latch: optional bit-inverse of initial register value; if - * you leave this initialized to zero the driver will act - * like the chip was just reset - * @setup: optional callback issued once the GPIOs are valid - * @teardown: optional callback issued before the GPIOs are invalidated - * @context: optional parameter passed to setup() and teardown() - * - * In addition to the I2C_BOARD_INFO() state appropriate to each chip, - * the i2c_board_info used with the pcf875x driver must provide its - * platform_data (pointer to one of these structures) with at least - * the gpio_base value initialized. - * - * The @setup callback may be used with the kind of board-specific glue - * which hands the (now-valid) GPIOs to other drivers, or which puts - * devices in their initial states using these GPIOs. - * - * These GPIO chips are only "quasi-bidirectional"; read the chip specs - * to understand the behavior. They don't have separate registers to - * record which pins are used for input or output, record which output - * values are driven, or provide access to input values. That must be - * inferred by reading the chip's value and knowing the last value written - * to it. If you leave n_latch initialized to zero, that last written - * value is presumed to be all ones (as if the chip were just reset). - */ -struct pcf857x_platform_data { - unsigned gpio_base; - unsigned n_latch; - - int (*setup)(struct i2c_client *client, - int gpio, unsigned ngpio, - void *context); - int (*teardown)(struct i2c_client *client, - int gpio, unsigned ngpio, - void *context); - void *context; -}; - -#endif /* __LINUX_PCF857X_H */ diff --git a/include/linux/platform_data/pcf857x.h b/include/linux/platform_data/pcf857x.h new file mode 100644 index 000000000000..0767a2a6b2f1 --- /dev/null +++ b/include/linux/platform_data/pcf857x.h @@ -0,0 +1,44 @@ +#ifndef __LINUX_PCF857X_H +#define __LINUX_PCF857X_H + +/** + * struct pcf857x_platform_data - data to set up pcf857x driver + * @gpio_base: number of the chip's first GPIO + * @n_latch: optional bit-inverse of initial register value; if + * you leave this initialized to zero the driver will act + * like the chip was just reset + * @setup: optional callback issued once the GPIOs are valid + * @teardown: optional callback issued before the GPIOs are invalidated + * @context: optional parameter passed to setup() and teardown() + * + * In addition to the I2C_BOARD_INFO() state appropriate to each chip, + * the i2c_board_info used with the pcf875x driver must provide its + * platform_data (pointer to one of these structures) with at least + * the gpio_base value initialized. + * + * The @setup callback may be used with the kind of board-specific glue + * which hands the (now-valid) GPIOs to other drivers, or which puts + * devices in their initial states using these GPIOs. + * + * These GPIO chips are only "quasi-bidirectional"; read the chip specs + * to understand the behavior. They don't have separate registers to + * record which pins are used for input or output, record which output + * values are driven, or provide access to input values. That must be + * inferred by reading the chip's value and knowing the last value written + * to it. If you leave n_latch initialized to zero, that last written + * value is presumed to be all ones (as if the chip were just reset). + */ +struct pcf857x_platform_data { + unsigned gpio_base; + unsigned n_latch; + + int (*setup)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + int (*teardown)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + void *context; +}; + +#endif /* __LINUX_PCF857X_H */ -- cgit v1.2.3 From d291f1a6523292d916fe1659c67f6db061fbd1b5 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 19 May 2017 15:48:52 +0300 Subject: IB/core: Enforce PKey security on QPs Add new LSM hooks to allocate and free security contexts and check for permission to access a PKey. Allocate and free a security context when creating and destroying a QP. This context is used for controlling access to PKeys. When a request is made to modify a QP that changes the port, PKey index, or alternate path, check that the QP has permission for the PKey in the PKey table index on the subnet prefix of the port. If the QP is shared make sure all handles to the QP also have access. Store which port and PKey index a QP is using. After the reset to init transition the user can modify the port, PKey index and alternate path independently. So port and PKey settings changes can be a merge of the previous settings and the new ones. In order to maintain access control if there are PKey table or subnet prefix change keep a list of all QPs are using each PKey index on each port. If a change occurs all QPs using that device and port must have access enforced for the new cache settings. These changes add a transaction to the QP modify process. Association with the old port and PKey index must be maintained if the modify fails, and must be removed if it succeeds. Association with the new port and PKey index must be established prior to the modify and removed if the modify fails. 1. When a QP is modified to a particular Port, PKey index or alternate path insert that QP into the appropriate lists. 2. Check permission to access the new settings. 3. If step 2 grants access attempt to modify the QP. 4a. If steps 2 and 3 succeed remove any prior associations. 4b. If ether fails remove the new setting associations. If a PKey table or subnet prefix changes walk the list of QPs and check that they have permission. If not send the QP to the error state and raise a fatal error event. If it's a shared QP make sure all the QPs that share the real_qp have permission as well. If the QP that owns a security structure is denied access the security structure is marked as such and the QP is added to an error_list. Once the moving the QP to error is complete the security structure mark is cleared. Maintaining the lists correctly turns QP destroy into a transaction. The hardware driver for the device frees the ib_qp structure, so while the destroy is in progress the ib_qp pointer in the ib_qp_security struct is undefined. When the destroy process begins the ib_qp_security structure is marked as destroying. This prevents any action from being taken on the QP pointer. After the QP is destroyed successfully it could still listed on an error_list wait for it to be processed by that flow before cleaning up the structure. If the destroy fails the QPs port and PKey settings are reinserted into the appropriate lists, the destroying flag is cleared, and access control is enforced, in case there were any cache changes during the destroy flow. To keep the security changes isolated a new file is used to hold security related functionality. Signed-off-by: Daniel Jurgens Acked-by: Doug Ledford [PM: merge fixup in ib_verbs.h and uverbs_cmd.c] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 27 +++++++++++++++++++++++++++ include/linux/security.h | 21 +++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 080f34e66017..6d9f41fffda7 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -8,6 +8,7 @@ * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) * Copyright (C) 2015 Intel Corporation. * Copyright (C) 2015 Casey Schaufler + * Copyright (C) 2016 Mellanox Techonologies * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -911,6 +912,21 @@ * associated with the TUN device's security structure. * @security pointer to the TUN devices's security structure. * + * Security hooks for Infiniband + * + * @ib_pkey_access: + * Check permission to access a pkey when modifing a QP. + * @subnet_prefix the subnet prefix of the port being used. + * @pkey the pkey to be accessed. + * @sec pointer to a security structure. + * @ib_alloc_security: + * Allocate a security structure for Infiniband objects. + * @sec pointer to a security structure pointer. + * Returns 0 on success, non-zero on failure + * @ib_free_security: + * Deallocate an Infiniband security structure. + * @sec contains the security structure to be freed. + * * Security hooks for XFRM operations. * * @xfrm_policy_alloc_security: @@ -1620,6 +1636,12 @@ union security_list_options { int (*tun_dev_open)(void *security); #endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_INFINIBAND + int (*ib_pkey_access)(void *sec, u64 subnet_prefix, u16 pkey); + int (*ib_alloc_security)(void **sec); + void (*ib_free_security)(void *sec); +#endif /* CONFIG_SECURITY_INFINIBAND */ + #ifdef CONFIG_SECURITY_NETWORK_XFRM int (*xfrm_policy_alloc_security)(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx, @@ -1851,6 +1873,11 @@ struct security_hook_heads { struct list_head tun_dev_attach; struct list_head tun_dev_open; #endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_INFINIBAND + struct list_head ib_pkey_access; + struct list_head ib_alloc_security; + struct list_head ib_free_security; +#endif /* CONFIG_SECURITY_INFINIBAND */ #ifdef CONFIG_SECURITY_NETWORK_XFRM struct list_head xfrm_policy_alloc_security; struct list_head xfrm_policy_clone_security; diff --git a/include/linux/security.h b/include/linux/security.h index af675b576645..8c73ee073bab 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -6,6 +6,7 @@ * Copyright (C) 2001 Networks Associates Technology, Inc * Copyright (C) 2001 James Morris * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) + * Copyright (C) 2016 Mellanox Techonologies * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1406,6 +1407,26 @@ static inline int security_tun_dev_open(void *security) } #endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_INFINIBAND +int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey); +int security_ib_alloc_security(void **sec); +void security_ib_free_security(void *sec); +#else /* CONFIG_SECURITY_INFINIBAND */ +static inline int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey) +{ + return 0; +} + +static inline int security_ib_alloc_security(void **sec) +{ + return 0; +} + +static inline void security_ib_free_security(void *sec) +{ +} +#endif /* CONFIG_SECURITY_INFINIBAND */ + #ifdef CONFIG_SECURITY_NETWORK_XFRM int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, -- cgit v1.2.3 From 8f408ab64be6319cb7736cbc6982838dcc362306 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 19 May 2017 15:48:53 +0300 Subject: selinux lsm IB/core: Implement LSM notification system Add a generic notificaiton mechanism in the LSM. Interested consumers can register a callback with the LSM and security modules can produce events. Because access to Infiniband QPs are enforced in the setup phase of a connection security should be enforced again if the policy changes. Register infiniband devices for policy change notification and check all QPs on that device when the notification is received. Add a call to the notification mechanism from SELinux when the AVC cache changes or setenforce is cleared. Signed-off-by: Daniel Jurgens Acked-by: James Morris Acked-by: Doug Ledford Signed-off-by: Paul Moore --- include/linux/security.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 8c73ee073bab..f96e333f6042 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -69,6 +69,10 @@ struct audit_krule; struct user_namespace; struct timezone; +enum lsm_event { + LSM_POLICY_CHANGE, +}; + /* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, int cap, int audit); @@ -164,6 +168,10 @@ struct security_mnt_opts { int num_mnt_opts; }; +int call_lsm_notifier(enum lsm_event event, void *data); +int register_lsm_notifier(struct notifier_block *nb); +int unregister_lsm_notifier(struct notifier_block *nb); + static inline void security_init_mnt_opts(struct security_mnt_opts *opts) { opts->mnt_opts = NULL; @@ -382,6 +390,21 @@ int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); struct security_mnt_opts { }; +static inline int call_lsm_notifier(enum lsm_event event, void *data) +{ + return 0; +} + +static inline int register_lsm_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int unregister_lsm_notifier(struct notifier_block *nb) +{ + return 0; +} + static inline void security_init_mnt_opts(struct security_mnt_opts *opts) { } -- cgit v1.2.3 From 47a2b338fe63200d716d2e24131cdb49f17c77da Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 19 May 2017 15:48:54 +0300 Subject: IB/core: Enforce security on management datagrams Allocate and free a security context when creating and destroying a MAD agent. This context is used for controlling access to PKeys and sending and receiving SMPs. When sending or receiving a MAD check that the agent has permission to access the PKey for the Subnet Prefix of the port. During MAD and snoop agent registration for SMI QPs check that the calling process has permission to access the manage the subnet and register a callback with the LSM to be notified of policy changes. When notificaiton of a policy change occurs recheck permission and set a flag indicating sending and receiving SMPs is allowed. When sending and receiving MADs check that the agent has access to the SMI if it's on an SMI QP. Because security policy can change it's possible permission was allowed when creating the agent, but no longer is. Signed-off-by: Daniel Jurgens Acked-by: Doug Ledford [PM: remove the LSM hook init code] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 8 ++++++++ include/linux/security.h | 6 ++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 6d9f41fffda7..68d91e423bca 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -919,6 +919,11 @@ * @subnet_prefix the subnet prefix of the port being used. * @pkey the pkey to be accessed. * @sec pointer to a security structure. + * @ib_endport_manage_subnet: + * Check permissions to send and receive SMPs on a end port. + * @dev_name the IB device name (i.e. mlx4_0). + * @port_num the port number. + * @sec pointer to a security structure. * @ib_alloc_security: * Allocate a security structure for Infiniband objects. * @sec pointer to a security structure pointer. @@ -1638,6 +1643,8 @@ union security_list_options { #ifdef CONFIG_SECURITY_INFINIBAND int (*ib_pkey_access)(void *sec, u64 subnet_prefix, u16 pkey); + int (*ib_endport_manage_subnet)(void *sec, const char *dev_name, + u8 port_num); int (*ib_alloc_security)(void **sec); void (*ib_free_security)(void *sec); #endif /* CONFIG_SECURITY_INFINIBAND */ @@ -1875,6 +1882,7 @@ struct security_hook_heads { #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_INFINIBAND struct list_head ib_pkey_access; + struct list_head ib_endport_manage_subnet; struct list_head ib_alloc_security; struct list_head ib_free_security; #endif /* CONFIG_SECURITY_INFINIBAND */ diff --git a/include/linux/security.h b/include/linux/security.h index f96e333f6042..549cb828a888 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1432,6 +1432,7 @@ static inline int security_tun_dev_open(void *security) #ifdef CONFIG_SECURITY_INFINIBAND int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey); +int security_ib_endport_manage_subnet(void *sec, const char *name, u8 port_num); int security_ib_alloc_security(void **sec); void security_ib_free_security(void *sec); #else /* CONFIG_SECURITY_INFINIBAND */ @@ -1440,6 +1441,11 @@ static inline int security_ib_pkey_access(void *sec, u64 subnet_prefix, u16 pkey return 0; } +static inline int security_ib_endport_manage_subnet(void *sec, const char *dev_name, u8 port_num) +{ + return 0; +} + static inline int security_ib_alloc_security(void **sec) { return 0; -- cgit v1.2.3 From cfc4d882d41780d93471066d57d4630995427b29 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 19 May 2017 15:48:57 +0300 Subject: selinux: Implement Infiniband PKey "Access" access vector Add a type and access vector for PKeys. Implement the ib_pkey_access hook to check that the caller has permission to access the PKey on the given subnet prefix. Add an interface to get the PKey SID. Walk the PKey ocontexts to find an entry for the given subnet prefix and pkey. Signed-off-by: Daniel Jurgens Reviewed-by: James Morris Acked-by: Doug Ledford Signed-off-by: Paul Moore --- include/linux/lsm_audit.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index e58e577117b6..0df5639a4ff4 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -45,6 +45,11 @@ struct lsm_ioctlop_audit { u16 cmd; }; +struct lsm_ibpkey_audit { + u64 subnet_prefix; + u16 pkey; +}; + /* Auxiliary data to use in generating the audit record. */ struct common_audit_data { char type; @@ -60,6 +65,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_DENTRY 10 #define LSM_AUDIT_DATA_IOCTL_OP 11 #define LSM_AUDIT_DATA_FILE 12 +#define LSM_AUDIT_DATA_IBPKEY 13 union { struct path path; struct dentry *dentry; @@ -77,6 +83,7 @@ struct common_audit_data { char *kmod_name; struct lsm_ioctlop_audit *op; struct file *file; + struct lsm_ibpkey_audit *ibpkey; } u; /* this union contains LSM specific data */ union { -- cgit v1.2.3 From ab861dfca1652aa09b26b7aa2899feb29b33dfd9 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 19 May 2017 15:48:58 +0300 Subject: selinux: Add IB Port SMP access vector Add a type for Infiniband ports and an access vector for subnet management packets. Implement the ib_port_smp hook to check that the caller has permission to send and receive SMPs on the end port specified by the device name and port. Add interface to query the SID for a IB port, which walks the IB_PORT ocontexts to find an entry for the given name and port. Signed-off-by: Daniel Jurgens Reviewed-by: James Morris Acked-by: Doug Ledford Signed-off-by: Paul Moore --- include/linux/lsm_audit.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 0df5639a4ff4..22b5d4e687ce 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -21,6 +21,7 @@ #include #include #include +#include struct lsm_network_audit { int netif; @@ -50,6 +51,11 @@ struct lsm_ibpkey_audit { u16 pkey; }; +struct lsm_ibendport_audit { + char dev_name[IB_DEVICE_NAME_MAX]; + u8 port; +}; + /* Auxiliary data to use in generating the audit record. */ struct common_audit_data { char type; @@ -66,6 +72,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_IOCTL_OP 11 #define LSM_AUDIT_DATA_FILE 12 #define LSM_AUDIT_DATA_IBPKEY 13 +#define LSM_AUDIT_DATA_IBENDPORT 14 union { struct path path; struct dentry *dentry; @@ -84,6 +91,7 @@ struct common_audit_data { struct lsm_ioctlop_audit *op; struct file *file; struct lsm_ibpkey_audit *ibpkey; + struct lsm_ibendport_audit *ibendport; } u; /* this union contains LSM specific data */ union { -- cgit v1.2.3 From 4166a56aa8d5babe979d8e0834a741c9f015ad14 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 20 May 2017 23:42:50 +0200 Subject: ARM/dmaengine: pl08x: pass reasonable memcpy settings We cannot use bits from configuration registers as API between platforms and driver like this, abstract it out to two enums and mimic the stuff passed as device tree data. This is done to make it possible for the driver to generate the ccfg word on-the-fly so we can support more PL08x derivatives. Acked-by: Olof Johansson Acked-by: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- include/linux/amba/pl08x.h | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 5308eae9ce35..79d1bcee738d 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -47,8 +47,6 @@ enum { * devices with static assignments * @muxval: a number usually used to poke into some mux regiser to * mux in the signal to this channel - * @cctl_memcpy: options for the channel control register for memcpy - * *** not used for slave channels *** * @addr: source/target address in physical memory for this DMA channel, * can be the address of a FIFO register for burst requests for example. * This can be left undefined if the PrimeCell API is used for configuring @@ -63,12 +61,28 @@ struct pl08x_channel_data { int min_signal; int max_signal; u32 muxval; - u32 cctl_memcpy; dma_addr_t addr; bool single; u8 periph_buses; }; +enum pl08x_burst_size { + PL08X_BURST_SZ_1, + PL08X_BURST_SZ_4, + PL08X_BURST_SZ_8, + PL08X_BURST_SZ_16, + PL08X_BURST_SZ_32, + PL08X_BURST_SZ_64, + PL08X_BURST_SZ_128, + PL08X_BURST_SZ_256, +}; + +enum pl08x_bus_width { + PL08X_BUS_WIDTH_8_BITS, + PL08X_BUS_WIDTH_16_BITS, + PL08X_BUS_WIDTH_32_BITS, +}; + /** * struct pl08x_platform_data - the platform configuration for the PL08x * PrimeCells. @@ -76,6 +90,11 @@ struct pl08x_channel_data { * platform, all inclusive, including multiplexed channels. The available * physical channels will be multiplexed around these signals as they are * requested, just enumerate all possible channels. + * @num_slave_channels: number of elements in the slave channel array + * @memcpy_burst_size: the appropriate burst size for memcpy operations + * @memcpy_bus_width: memory bus width + * @memcpy_prot_buff: whether memcpy DMA is bufferable + * @memcpy_prot_cache: whether memcpy DMA is cacheable * @get_xfer_signal: request a physical signal to be used for a DMA transfer * immediately: if there is some multiplexing or similar blocking the use * of the channel the transfer can be denied by returning less than zero, @@ -90,7 +109,10 @@ struct pl08x_channel_data { struct pl08x_platform_data { struct pl08x_channel_data *slave_channels; unsigned int num_slave_channels; - struct pl08x_channel_data memcpy_channel; + enum pl08x_burst_size memcpy_burst_size; + enum pl08x_bus_width memcpy_bus_width; + bool memcpy_prot_buff; + bool memcpy_prot_cache; int (*get_xfer_signal)(const struct pl08x_channel_data *); void (*put_xfer_signal)(const struct pl08x_channel_data *, int); u8 lli_buses; -- cgit v1.2.3 From 1e1cfc7213a37131a53e7dfada75dce77b8e043d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 20 May 2017 23:42:53 +0200 Subject: dmaengine: pl08x: Add support for Faraday Technology FTDMAC020 After reading the specs for the Faraday Technology FTDMAC020 found in the Gemini platform, it becomes pretty evident that this is just another PL08x derivative, and should be handled like such by simply extending the existing PL08x driver to handle the quirks in this hardware. This patch makes memcpy work and has been tested on the Gemini and also regression-tested on the Nomadik NHK15 using dmatest with 10 threads per channel without a hinch for hours. I have not implemented slave DMA in those codepaths, because this device (Gemini) does not use slave DMA, and it seems like devices using FTDMAC020 for device DMA have a slightly different register layout so some real hardware is needed to proceed with this. I left some FIXME etc in the code for this. I had to do some refactorings of some helper functions, but I have not split those into separate patches because these refactorings do not make much sense without the increased complexity of handling the FTDMAC020. The DMA test would hang the platform on me on the Gemini after a few thousand iterations, however after turning of the caches the problem immediately disappeared and I could run the DMA engine with 10 threads pers physical channel for days in a row without a crash. I think there is no problem with the DMA driver: instead it is something fishy in the FA526 cache handling code that get pretty heavily exercised by the DMA engine and we need to go and fix that instead. Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- include/linux/amba/pl080.h | 83 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h index 580b5323a717..10124c9f9db5 100644 --- a/include/linux/amba/pl080.h +++ b/include/linux/amba/pl080.h @@ -44,7 +44,14 @@ #define PL080_SYNC (0x34) -/* Per channel configuration registers */ +/* The Faraday Technology FTDMAC020 variant registers */ +#define FTDMAC020_CH_BUSY (0x20) +/* Identical to PL080_CONFIG */ +#define FTDMAC020_CSR (0x24) +/* Identical to PL080_SYNC */ +#define FTDMAC020_SYNC (0x2C) +#define FTDMAC020_REVISION (0x30) +#define FTDMAC020_FEATURE (0x34) /* Per channel configuration registers */ #define PL080_Cx_BASE(x) ((0x100 + (x * 0x20))) @@ -55,6 +62,13 @@ #define PL080_CH_CONFIG (0x10) #define PL080S_CH_CONTROL2 (0x10) #define PL080S_CH_CONFIG (0x14) +/* The Faraday FTDMAC020 derivative shuffles the registers around */ +#define FTDMAC020_CH_CSR (0x00) +#define FTDMAC020_CH_CFG (0x04) +#define FTDMAC020_CH_SRC_ADDR (0x08) +#define FTDMAC020_CH_DST_ADDR (0x0C) +#define FTDMAC020_CH_LLP (0x10) +#define FTDMAC020_CH_SIZE (0x14) #define PL080_LLI_ADDR_MASK (0x3fffffff << 2) #define PL080_LLI_ADDR_SHIFT (2) @@ -119,6 +133,73 @@ #define PL080_FLOW_PER2MEM_PER (0x6) #define PL080_FLOW_SRC2DST_SRC (0x7) +#define FTDMAC020_CH_CSR_TC_MSK BIT(31) +/* Later versions have a threshold in bits 24..26, */ +#define FTDMAC020_CH_CSR_FIFOTH_MSK (0x7 << 24) +#define FTDMAC020_CH_CSR_FIFOTH_SHIFT (24) +#define FTDMAC020_CH_CSR_CHPR1_MSK (0x3 << 22) +#define FTDMAC020_CH_CSR_PROT3 BIT(21) +#define FTDMAC020_CH_CSR_PROT2 BIT(20) +#define FTDMAC020_CH_CSR_PROT1 BIT(19) +#define FTDMAC020_CH_CSR_SRC_SIZE_MSK (0x7 << 16) +#define FTDMAC020_CH_CSR_SRC_SIZE_SHIFT (16) +#define FTDMAC020_CH_CSR_ABT BIT(15) +#define FTDMAC020_CH_CSR_SRC_WIDTH_MSK (0x7 << 11) +#define FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT (11) +#define FTDMAC020_CH_CSR_DST_WIDTH_MSK (0x7 << 8) +#define FTDMAC020_CH_CSR_DST_WIDTH_SHIFT (8) +#define FTDMAC020_CH_CSR_MODE BIT(7) +/* 00 = increase, 01 = decrease, 10 = fix */ +#define FTDMAC020_CH_CSR_SRCAD_CTL_MSK (0x3 << 5) +#define FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT (5) +#define FTDMAC020_CH_CSR_DSTAD_CTL_MSK (0x3 << 3) +#define FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT (3) +#define FTDMAC020_CH_CSR_SRC_SEL BIT(2) +#define FTDMAC020_CH_CSR_DST_SEL BIT(1) +#define FTDMAC020_CH_CSR_EN BIT(0) + +/* FIFO threshold setting */ +#define FTDMAC020_CH_CSR_FIFOTH_1 (0x0) +#define FTDMAC020_CH_CSR_FIFOTH_2 (0x1) +#define FTDMAC020_CH_CSR_FIFOTH_4 (0x2) +#define FTDMAC020_CH_CSR_FIFOTH_8 (0x3) +#define FTDMAC020_CH_CSR_FIFOTH_16 (0x4) +/* The FTDMAC020 supports 64bit wide transfers */ +#define FTDMAC020_WIDTH_64BIT (0x3) +/* Address can be increased, decreased or fixed */ +#define FTDMAC020_CH_CSR_SRCAD_CTL_INC (0x0) +#define FTDMAC020_CH_CSR_SRCAD_CTL_DEC (0x1) +#define FTDMAC020_CH_CSR_SRCAD_CTL_FIXED (0x2) + +#define FTDMAC020_CH_CFG_LLP_CNT_MASK (0xf << 16) +#define FTDMAC020_CH_CFG_LLP_CNT_SHIFT (16) +#define FTDMAC020_CH_CFG_BUSY BIT(8) +#define FTDMAC020_CH_CFG_INT_ABT_MASK BIT(2) +#define FTDMAC020_CH_CFG_INT_ERR_MASK BIT(1) +#define FTDMAC020_CH_CFG_INT_TC_MASK BIT(0) + +/* Inside the LLIs, the applicable CSR fields are mapped differently */ +#define FTDMAC020_LLI_TC_MSK BIT(28) +#define FTDMAC020_LLI_SRC_WIDTH_MSK (0x7 << 25) +#define FTDMAC020_LLI_SRC_WIDTH_SHIFT (25) +#define FTDMAC020_LLI_DST_WIDTH_MSK (0x7 << 22) +#define FTDMAC020_LLI_DST_WIDTH_SHIFT (22) +#define FTDMAC020_LLI_SRCAD_CTL_MSK (0x3 << 20) +#define FTDMAC020_LLI_SRCAD_CTL_SHIFT (20) +#define FTDMAC020_LLI_DSTAD_CTL_MSK (0x3 << 18) +#define FTDMAC020_LLI_DSTAD_CTL_SHIFT (18) +#define FTDMAC020_LLI_SRC_SEL BIT(17) +#define FTDMAC020_LLI_DST_SEL BIT(16) +#define FTDMAC020_LLI_TRANSFER_SIZE_MASK (0xfff << 0) +#define FTDMAC020_LLI_TRANSFER_SIZE_SHIFT (0) + +#define FTDMAC020_CFG_LLP_CNT_MASK (0x0f << 16) +#define FTDMAC020_CFG_LLP_CNT_SHIFT (16) +#define FTDMAC020_CFG_BUSY BIT(8) +#define FTDMAC020_CFG_INT_ABT_MSK BIT(2) +#define FTDMAC020_CFG_INT_ERR_MSK BIT(1) +#define FTDMAC020_CFG_INT_TC_MSK BIT(0) + /* DMA linked list chain structure */ struct pl080_lli { -- cgit v1.2.3 From fcc785417fba2dc81d2f6ba888caaff463f4f441 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 20 May 2017 23:42:54 +0200 Subject: dmaengine: pl08x: use GENMASK() to create bitmasks This switches the arbitrary shifting of hex constants in the pl080 header to use GENMASK(). Suggested-by: Vinod Koul Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- include/linux/amba/pl080.h | 50 +++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h index 10124c9f9db5..ab036b6b1804 100644 --- a/include/linux/amba/pl080.h +++ b/include/linux/amba/pl080.h @@ -70,12 +70,12 @@ #define FTDMAC020_CH_LLP (0x10) #define FTDMAC020_CH_SIZE (0x14) -#define PL080_LLI_ADDR_MASK (0x3fffffff << 2) +#define PL080_LLI_ADDR_MASK GENMASK(31, 2) #define PL080_LLI_ADDR_SHIFT (2) #define PL080_LLI_LM_AHB2 BIT(0) #define PL080_CONTROL_TC_IRQ_EN BIT(31) -#define PL080_CONTROL_PROT_MASK (0x7 << 28) +#define PL080_CONTROL_PROT_MASK GENMASK(30, 28) #define PL080_CONTROL_PROT_SHIFT (28) #define PL080_CONTROL_PROT_CACHE BIT(30) #define PL080_CONTROL_PROT_BUFF BIT(29) @@ -84,16 +84,16 @@ #define PL080_CONTROL_SRC_INCR BIT(26) #define PL080_CONTROL_DST_AHB2 BIT(25) #define PL080_CONTROL_SRC_AHB2 BIT(24) -#define PL080_CONTROL_DWIDTH_MASK (0x7 << 21) +#define PL080_CONTROL_DWIDTH_MASK GENMASK(23, 21) #define PL080_CONTROL_DWIDTH_SHIFT (21) -#define PL080_CONTROL_SWIDTH_MASK (0x7 << 18) +#define PL080_CONTROL_SWIDTH_MASK GENMASK(20, 18) #define PL080_CONTROL_SWIDTH_SHIFT (18) -#define PL080_CONTROL_DB_SIZE_MASK (0x7 << 15) +#define PL080_CONTROL_DB_SIZE_MASK GENMASK(17, 15) #define PL080_CONTROL_DB_SIZE_SHIFT (15) -#define PL080_CONTROL_SB_SIZE_MASK (0x7 << 12) +#define PL080_CONTROL_SB_SIZE_MASK GENMASK(14, 12) #define PL080_CONTROL_SB_SIZE_SHIFT (12) -#define PL080_CONTROL_TRANSFER_SIZE_MASK (0xfff << 0) -#define PL080S_CONTROL_TRANSFER_SIZE_MASK (0x1ffffff << 0) +#define PL080_CONTROL_TRANSFER_SIZE_MASK GENMASK(11, 0) +#define PL080S_CONTROL_TRANSFER_SIZE_MASK GENMASK(24, 0) #define PL080_CONTROL_TRANSFER_SIZE_SHIFT (0) #define PL080_BSIZE_1 (0x0) @@ -116,11 +116,11 @@ #define PL080_CONFIG_LOCK BIT(16) #define PL080_CONFIG_TC_IRQ_MASK BIT(15) #define PL080_CONFIG_ERR_IRQ_MASK BIT(14) -#define PL080_CONFIG_FLOW_CONTROL_MASK (0x7 << 11) +#define PL080_CONFIG_FLOW_CONTROL_MASK GENMASK(13, 11) #define PL080_CONFIG_FLOW_CONTROL_SHIFT (11) -#define PL080_CONFIG_DST_SEL_MASK (0xf << 6) +#define PL080_CONFIG_DST_SEL_MASK GENMASK(9, 6) #define PL080_CONFIG_DST_SEL_SHIFT (6) -#define PL080_CONFIG_SRC_SEL_MASK (0xf << 1) +#define PL080_CONFIG_SRC_SEL_MASK GENMASK(4, 1) #define PL080_CONFIG_SRC_SEL_SHIFT (1) #define PL080_CONFIG_ENABLE BIT(0) @@ -135,24 +135,24 @@ #define FTDMAC020_CH_CSR_TC_MSK BIT(31) /* Later versions have a threshold in bits 24..26, */ -#define FTDMAC020_CH_CSR_FIFOTH_MSK (0x7 << 24) +#define FTDMAC020_CH_CSR_FIFOTH_MSK GENMASK(26, 24) #define FTDMAC020_CH_CSR_FIFOTH_SHIFT (24) -#define FTDMAC020_CH_CSR_CHPR1_MSK (0x3 << 22) +#define FTDMAC020_CH_CSR_CHPR1_MSK GENMASK(23, 22) #define FTDMAC020_CH_CSR_PROT3 BIT(21) #define FTDMAC020_CH_CSR_PROT2 BIT(20) #define FTDMAC020_CH_CSR_PROT1 BIT(19) -#define FTDMAC020_CH_CSR_SRC_SIZE_MSK (0x7 << 16) +#define FTDMAC020_CH_CSR_SRC_SIZE_MSK GENMASK(18, 16) #define FTDMAC020_CH_CSR_SRC_SIZE_SHIFT (16) #define FTDMAC020_CH_CSR_ABT BIT(15) -#define FTDMAC020_CH_CSR_SRC_WIDTH_MSK (0x7 << 11) +#define FTDMAC020_CH_CSR_SRC_WIDTH_MSK GENMASK(13, 11) #define FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT (11) -#define FTDMAC020_CH_CSR_DST_WIDTH_MSK (0x7 << 8) +#define FTDMAC020_CH_CSR_DST_WIDTH_MSK GENMASK(10, 8) #define FTDMAC020_CH_CSR_DST_WIDTH_SHIFT (8) #define FTDMAC020_CH_CSR_MODE BIT(7) /* 00 = increase, 01 = decrease, 10 = fix */ -#define FTDMAC020_CH_CSR_SRCAD_CTL_MSK (0x3 << 5) +#define FTDMAC020_CH_CSR_SRCAD_CTL_MSK GENMASK(6, 5) #define FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT (5) -#define FTDMAC020_CH_CSR_DSTAD_CTL_MSK (0x3 << 3) +#define FTDMAC020_CH_CSR_DSTAD_CTL_MSK GENMASK(4, 3) #define FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT (3) #define FTDMAC020_CH_CSR_SRC_SEL BIT(2) #define FTDMAC020_CH_CSR_DST_SEL BIT(1) @@ -171,7 +171,7 @@ #define FTDMAC020_CH_CSR_SRCAD_CTL_DEC (0x1) #define FTDMAC020_CH_CSR_SRCAD_CTL_FIXED (0x2) -#define FTDMAC020_CH_CFG_LLP_CNT_MASK (0xf << 16) +#define FTDMAC020_CH_CFG_LLP_CNT_MASK GENMASK(19, 16) #define FTDMAC020_CH_CFG_LLP_CNT_SHIFT (16) #define FTDMAC020_CH_CFG_BUSY BIT(8) #define FTDMAC020_CH_CFG_INT_ABT_MASK BIT(2) @@ -180,20 +180,20 @@ /* Inside the LLIs, the applicable CSR fields are mapped differently */ #define FTDMAC020_LLI_TC_MSK BIT(28) -#define FTDMAC020_LLI_SRC_WIDTH_MSK (0x7 << 25) +#define FTDMAC020_LLI_SRC_WIDTH_MSK GENMASK(27, 25) #define FTDMAC020_LLI_SRC_WIDTH_SHIFT (25) -#define FTDMAC020_LLI_DST_WIDTH_MSK (0x7 << 22) +#define FTDMAC020_LLI_DST_WIDTH_MSK GENMASK(24, 22) #define FTDMAC020_LLI_DST_WIDTH_SHIFT (22) -#define FTDMAC020_LLI_SRCAD_CTL_MSK (0x3 << 20) +#define FTDMAC020_LLI_SRCAD_CTL_MSK GENMASK(21, 20) #define FTDMAC020_LLI_SRCAD_CTL_SHIFT (20) -#define FTDMAC020_LLI_DSTAD_CTL_MSK (0x3 << 18) +#define FTDMAC020_LLI_DSTAD_CTL_MSK GENMASK(19, 18) #define FTDMAC020_LLI_DSTAD_CTL_SHIFT (18) #define FTDMAC020_LLI_SRC_SEL BIT(17) #define FTDMAC020_LLI_DST_SEL BIT(16) -#define FTDMAC020_LLI_TRANSFER_SIZE_MASK (0xfff << 0) +#define FTDMAC020_LLI_TRANSFER_SIZE_MASK GENMASK(11, 0) #define FTDMAC020_LLI_TRANSFER_SIZE_SHIFT (0) -#define FTDMAC020_CFG_LLP_CNT_MASK (0x0f << 16) +#define FTDMAC020_CFG_LLP_CNT_MASK GENMASK(19, 16) #define FTDMAC020_CFG_LLP_CNT_SHIFT (16) #define FTDMAC020_CFG_BUSY BIT(8) #define FTDMAC020_CFG_INT_ABT_MSK BIT(2) -- cgit v1.2.3 From e73ad5ff2f76da25390e9607cb549691639330c3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 22 May 2017 15:30:03 -0700 Subject: mm, x86/mm: Make the batched unmap TLB flush API more generic try_to_unmap_flush() used to open-code a rather x86-centric flush sequence: local_flush_tlb() + flush_tlb_others(). Rearrange the code so that the arch (only x86 for now) provides arch_tlbbatch_add_mm() and arch_tlbbatch_flush() and the core code calls those functions instead. I'll want this for x86 because, to enable address space ids, I can't support the flush_tlb_others() mode used by exising try_to_unmap_flush() implementation with good performance. I can support the new API fairly easily, though. I imagine that other architectures may be in a similar position. Architectures with strong remote flush primitives (arm64?) may have even worse performance problems with flush_tlb_others() the way that try_to_unmap_flush() uses it. Signed-off-by: Andy Lutomirski Acked-by: Kees Cook Cc: Andrew Morton Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Mel Gorman Cc: Michal Hocko Cc: Nadav Amit Cc: Nadav Amit Cc: Peter Zijlstra Cc: Rik van Riel Cc: Sasha Levin Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/19f25a8581f9fb77876b7ff3b001f89835e34ea3.1495492063.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm_types_task.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index 136dfdf63ba1..fc412fbd80bd 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -14,6 +14,10 @@ #include +#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH +#include +#endif + #define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) #define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) @@ -67,12 +71,15 @@ struct page_frag { struct tlbflush_unmap_batch { #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* - * Each bit set is a CPU that potentially has a TLB entry for one of - * the PFNs being flushed. See set_tlb_ubc_flush_pending(). + * The arch code makes the following promise: generic code can modify a + * PTE, then call arch_tlbbatch_add_mm() (which internally provides all + * needed barriers), then call arch_tlbbatch_flush(), and the entries + * will be flushed on all CPUs by the time that arch_tlbbatch_flush() + * returns. */ - struct cpumask cpumask; + struct arch_tlbflush_unmap_batch arch; - /* True if any bit in cpumask is set */ + /* True if a flush is needed. */ bool flush_required; /* -- cgit v1.2.3 From 71ebc9a3795818eab52e81bbcbdfae130ee35d9e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 May 2017 12:10:42 +0100 Subject: dma-buf/sync-file: Defer creation of sync_file->name Constructing the name takes the majority of the time for allocating a sync_file to wrap a fence, and the name is very rarely used (only via the sync_file status user interface). To reduce the impact on the common path (that of creating sync_file to pass around), defer the construction of the name until it is first used. v2: Update kerneldoc (kbuild test robot) v3: sync_debug.c was peeking at the name v4: Comment upon the potential race between two users of sync_file_get_name() and claim that such a race is below the level of notice. However, to prevent any future nuisance, use a global spinlock to serialize the assignment of the name. v5: Completely avoid the read/write race by only storing the name passed in from the user inside sync_file->user_name and passing in a buffer to dynamically construct the name otherwise. Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Gustavo Padovan Cc: Daniel Vetter Cc: David Herrmann Reviewed-by: Daniel Vetter Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170516111042.24719-1-chris@chris-wilson.co.uk --- include/linux/sync_file.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index d37beefdfbd5..5726107963b2 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -23,7 +23,6 @@ /** * struct sync_file - sync file to export to the userspace * @file: file representing this fence - * @name: name of sync_file. Useful for debugging * @sync_file_list: membership in global file list * @wq: wait queue for fence signaling * @fence: fence with the fences in the sync_file @@ -31,7 +30,14 @@ */ struct sync_file { struct file *file; - char name[32]; + /** + * @user_name: + * + * Name of the sync file provided by userspace, for merged fences. + * Otherwise generated through driver callbacks (in which case the + * entire array is 0). + */ + char user_name[32]; #ifdef CONFIG_DEBUG_FS struct list_head sync_file_list; #endif @@ -46,5 +52,6 @@ struct sync_file { struct sync_file *sync_file_create(struct dma_fence *fence); struct dma_fence *sync_file_get_fence(int fd); +char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len); #endif /* _LINUX_SYNC_H */ -- cgit v1.2.3 From 9d7650c25498e4f51213fe48eddde5778434f375 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 23 May 2017 09:41:19 +0300 Subject: qed: Align DP_ERR style with other DP macros Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index c70ac13a97e6..ff590cb37a00 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -700,11 +700,13 @@ struct qed_common_ops { (((value) >> (name ## _SHIFT)) & name ## _MASK) /* Debug print definitions */ -#define DP_ERR(cdev, fmt, ...) \ - pr_err("[%s:%d(%s)]" fmt, \ - __func__, __LINE__, \ - DP_NAME(cdev) ? DP_NAME(cdev) : "", \ - ## __VA_ARGS__) \ +#define DP_ERR(cdev, fmt, ...) \ + do { \ + pr_err("[%s:%d(%s)]" fmt, \ + __func__, __LINE__, \ + DP_NAME(cdev) ? DP_NAME(cdev) : "", \ + ## __VA_ARGS__); \ + } while (0) #define DP_NOTICE(cdev, fmt, ...) \ do { \ -- cgit v1.2.3 From ae33666ab89675968d77753d18452b1ef654c43a Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 23 May 2017 09:41:26 +0300 Subject: qed: Provide MBI information in dev_info Pass additional information about package installed on persistent memory so that protocol drivers would be able to log it. Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index ff590cb37a00..b00e6753b4f4 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -328,6 +328,14 @@ struct qed_dev_info { /* MFW version */ u32 mfw_rev; +#define QED_MFW_VERSION_0_MASK 0x000000FF +#define QED_MFW_VERSION_0_OFFSET 0 +#define QED_MFW_VERSION_1_MASK 0x0000FF00 +#define QED_MFW_VERSION_1_OFFSET 8 +#define QED_MFW_VERSION_2_MASK 0x00FF0000 +#define QED_MFW_VERSION_2_OFFSET 16 +#define QED_MFW_VERSION_3_MASK 0xFF000000 +#define QED_MFW_VERSION_3_OFFSET 24 u32 flash_size; u8 mf_mode; @@ -337,6 +345,15 @@ struct qed_dev_info { bool wol_support; + /* MBI version */ + u32 mbi_version; +#define QED_MBI_VERSION_0_MASK 0x000000FF +#define QED_MBI_VERSION_0_OFFSET 0 +#define QED_MBI_VERSION_1_MASK 0x0000FF00 +#define QED_MBI_VERSION_1_OFFSET 8 +#define QED_MBI_VERSION_2_MASK 0x00FF0000 +#define QED_MBI_VERSION_2_OFFSET 16 + enum qed_dev_type dev_type; /* Output parameters for qede */ -- cgit v1.2.3 From 712c3cbf193fcadf0ba67da61432beb1a71e400b Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 23 May 2017 09:41:28 +0300 Subject: qed: Replace set_id() api with set_name() Current API between qed and protocol modules allows passing an additional private string - but it doesn't get utilized by qed anywhere. Clarify the API by removing it and renaming it 'set_name'. CC: Manish Rangankar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b00e6753b4f4..73c46d6d5727 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -520,9 +520,7 @@ struct qed_common_ops { int (*set_power_state)(struct qed_dev *cdev, pci_power_t state); - void (*set_id)(struct qed_dev *cdev, - char name[], - char ver_str[]); + void (*set_name) (struct qed_dev *cdev, char name[]); /* Client drivers need to make this call before slowpath_start. * PF params required for the call before slowpath_start is -- cgit v1.2.3 From be035303182a1260803a1871065d7b1e67c9ebe9 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 23 May 2017 17:46:56 +0530 Subject: regulator: tps65917: Add support for SMPS12 App support for SMPS12 dual phase regulator. Signed-off-by: Keerthy Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/palmas.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index 5c9a1d44c125..6dec43826303 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -250,6 +250,7 @@ enum tps65917_regulators { TPS65917_REG_SMPS3, TPS65917_REG_SMPS4, TPS65917_REG_SMPS5, + TPS65917_REG_SMPS12, /* LDO regulators */ TPS65917_REG_LDO1, TPS65917_REG_LDO2, @@ -317,6 +318,7 @@ enum tps65917_external_requestor_id { TPS65917_EXTERNAL_REQSTR_ID_SMPS3, TPS65917_EXTERNAL_REQSTR_ID_SMPS4, TPS65917_EXTERNAL_REQSTR_ID_SMPS5, + TPS65917_EXTERNAL_REQSTR_ID_SMPS12, TPS65917_EXTERNAL_REQSTR_ID_LDO1, TPS65917_EXTERNAL_REQSTR_ID_LDO2, TPS65917_EXTERNAL_REQSTR_ID_LDO3, -- cgit v1.2.3 From 610387d162eb1beb6eb2009af5175dc6b44b8da6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 22:42:32 +0200 Subject: misc: apds990x: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c/apds990x.h | 79 ---------------------------------- include/linux/platform_data/apds990x.h | 79 ++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 79 deletions(-) delete mode 100644 include/linux/i2c/apds990x.h create mode 100644 include/linux/platform_data/apds990x.h (limited to 'include/linux') diff --git a/include/linux/i2c/apds990x.h b/include/linux/i2c/apds990x.h deleted file mode 100644 index d186fcc5d257..000000000000 --- a/include/linux/i2c/apds990x.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * This file is part of the APDS990x sensor driver. - * Chip is combined proximity and ambient light sensor. - * - * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). - * - * Contact: Samu Onkalo - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - -#ifndef __APDS990X_H__ -#define __APDS990X_H__ - - -#define APDS_IRLED_CURR_12mA 0x3 -#define APDS_IRLED_CURR_25mA 0x2 -#define APDS_IRLED_CURR_50mA 0x1 -#define APDS_IRLED_CURR_100mA 0x0 - -/** - * struct apds990x_chip_factors - defines effect of the cover window - * @ga: Total glass attenuation - * @cf1: clear channel factor 1 for raw to lux conversion - * @irf1: IR channel factor 1 for raw to lux conversion - * @cf2: clear channel factor 2 for raw to lux conversion - * @irf2: IR channel factor 2 for raw to lux conversion - * @df: device factor for conversion formulas - * - * Structure for tuning ALS calculation to match with environment. - * Values depend on the material above the sensor and the sensor - * itself. If the GA is zero, driver will use uncovered sensor default values - * format: decimal value * APDS_PARAM_SCALE except df which is plain integer. - */ -#define APDS_PARAM_SCALE 4096 -struct apds990x_chip_factors { - int ga; - int cf1; - int irf1; - int cf2; - int irf2; - int df; -}; - -/** - * struct apds990x_platform_data - platform data for apsd990x.c driver - * @cf: chip factor data - * @pddrive: IR-led driving current - * @ppcount: number of IR pulses used for proximity estimation - * @setup_resources: interrupt line setup call back function - * @release_resources: interrupt line release call back function - * - * Proximity detection result depends heavily on correct ppcount, pdrive - * and cover window. - * - */ - -struct apds990x_platform_data { - struct apds990x_chip_factors cf; - u8 pdrive; - u8 ppcount; - int (*setup_resources)(void); - int (*release_resources)(void); -}; - -#endif diff --git a/include/linux/platform_data/apds990x.h b/include/linux/platform_data/apds990x.h new file mode 100644 index 000000000000..d186fcc5d257 --- /dev/null +++ b/include/linux/platform_data/apds990x.h @@ -0,0 +1,79 @@ +/* + * This file is part of the APDS990x sensor driver. + * Chip is combined proximity and ambient light sensor. + * + * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). + * + * Contact: Samu Onkalo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __APDS990X_H__ +#define __APDS990X_H__ + + +#define APDS_IRLED_CURR_12mA 0x3 +#define APDS_IRLED_CURR_25mA 0x2 +#define APDS_IRLED_CURR_50mA 0x1 +#define APDS_IRLED_CURR_100mA 0x0 + +/** + * struct apds990x_chip_factors - defines effect of the cover window + * @ga: Total glass attenuation + * @cf1: clear channel factor 1 for raw to lux conversion + * @irf1: IR channel factor 1 for raw to lux conversion + * @cf2: clear channel factor 2 for raw to lux conversion + * @irf2: IR channel factor 2 for raw to lux conversion + * @df: device factor for conversion formulas + * + * Structure for tuning ALS calculation to match with environment. + * Values depend on the material above the sensor and the sensor + * itself. If the GA is zero, driver will use uncovered sensor default values + * format: decimal value * APDS_PARAM_SCALE except df which is plain integer. + */ +#define APDS_PARAM_SCALE 4096 +struct apds990x_chip_factors { + int ga; + int cf1; + int irf1; + int cf2; + int irf2; + int df; +}; + +/** + * struct apds990x_platform_data - platform data for apsd990x.c driver + * @cf: chip factor data + * @pddrive: IR-led driving current + * @ppcount: number of IR pulses used for proximity estimation + * @setup_resources: interrupt line setup call back function + * @release_resources: interrupt line release call back function + * + * Proximity detection result depends heavily on correct ppcount, pdrive + * and cover window. + * + */ + +struct apds990x_platform_data { + struct apds990x_chip_factors cf; + u8 pdrive; + u8 ppcount; + int (*setup_resources)(void); + int (*release_resources)(void); +}; + +#endif -- cgit v1.2.3 From 7ae5f10a9fc1ae2f001ab3be5be84a5d0a89f918 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 22:42:33 +0200 Subject: misc: bh1770glc: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c/bh1770glc.h | 53 --------------------------------- include/linux/platform_data/bh1770glc.h | 53 +++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 53 deletions(-) delete mode 100644 include/linux/i2c/bh1770glc.h create mode 100644 include/linux/platform_data/bh1770glc.h (limited to 'include/linux') diff --git a/include/linux/i2c/bh1770glc.h b/include/linux/i2c/bh1770glc.h deleted file mode 100644 index 8b5e2df36c72..000000000000 --- a/include/linux/i2c/bh1770glc.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver. - * Chip is combined proximity and ambient light sensor. - * - * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). - * - * Contact: Samu Onkalo - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - -#ifndef __BH1770_H__ -#define __BH1770_H__ - -/** - * struct bh1770_platform_data - platform data for bh1770glc driver - * @led_def_curr: IR led driving current. - * @glass_attenuation: Attenuation factor for covering window. - * @setup_resources: Call back for interrupt line setup function - * @release_resources: Call back for interrupte line release function - * - * Example of glass attenuation: 16384 * 385 / 100 means attenuation factor - * of 3.85. i.e. light_above_sensor = light_above_cover_window / 3.85 - */ - -struct bh1770_platform_data { -#define BH1770_LED_5mA 0 -#define BH1770_LED_10mA 1 -#define BH1770_LED_20mA 2 -#define BH1770_LED_50mA 3 -#define BH1770_LED_100mA 4 -#define BH1770_LED_150mA 5 -#define BH1770_LED_200mA 6 - __u8 led_def_curr; -#define BH1770_NEUTRAL_GA 16384 /* 16384 / 16384 = 1 */ - __u32 glass_attenuation; - int (*setup_resources)(void); - int (*release_resources)(void); -}; -#endif diff --git a/include/linux/platform_data/bh1770glc.h b/include/linux/platform_data/bh1770glc.h new file mode 100644 index 000000000000..8b5e2df36c72 --- /dev/null +++ b/include/linux/platform_data/bh1770glc.h @@ -0,0 +1,53 @@ +/* + * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver. + * Chip is combined proximity and ambient light sensor. + * + * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). + * + * Contact: Samu Onkalo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#ifndef __BH1770_H__ +#define __BH1770_H__ + +/** + * struct bh1770_platform_data - platform data for bh1770glc driver + * @led_def_curr: IR led driving current. + * @glass_attenuation: Attenuation factor for covering window. + * @setup_resources: Call back for interrupt line setup function + * @release_resources: Call back for interrupte line release function + * + * Example of glass attenuation: 16384 * 385 / 100 means attenuation factor + * of 3.85. i.e. light_above_sensor = light_above_cover_window / 3.85 + */ + +struct bh1770_platform_data { +#define BH1770_LED_5mA 0 +#define BH1770_LED_10mA 1 +#define BH1770_LED_20mA 2 +#define BH1770_LED_50mA 3 +#define BH1770_LED_100mA 4 +#define BH1770_LED_150mA 5 +#define BH1770_LED_200mA 6 + __u8 led_def_curr; +#define BH1770_NEUTRAL_GA 16384 /* 16384 / 16384 = 1 */ + __u32 glass_attenuation; + int (*setup_resources)(void); + int (*release_resources)(void); +}; +#endif -- cgit v1.2.3 From f36776fafbaa0094390dd4e7e3e29805e0b82730 Mon Sep 17 00:00:00 2001 From: Peter Rajnoha Date: Tue, 9 May 2017 15:22:30 +0200 Subject: kobject: support passing in variables for synthetic uevents This patch makes it possible to pass additional arguments in addition to uevent action name when writing /sys/.../uevent attribute. These additional arguments are then inserted into generated synthetic uevent as additional environment variables. Before, we were not able to pass any additional uevent environment variables for synthetic uevents. This made it hard to identify such uevents properly in userspace to make proper distinction between genuine uevents originating from kernel and synthetic uevents triggered from userspace. Also, it was not possible to pass any additional information which would make it possible to optimize and change the way the synthetic uevents are processed back in userspace based on the originating environment of the triggering action in userspace. With the extra additional variables, we are able to pass through this extra information needed and also it makes it possible to synchronize with such synthetic uevents as they can be clearly identified back in userspace. The format for writing the uevent attribute is following: ACTION [UUID [KEY=VALUE ...] There's no change in how "ACTION" is recognized - it stays the same ("add", "change", "remove"). The "ACTION" is the only argument required to generate synthetic uevent, the rest of arguments, that this patch adds support for, are optional. The "UUID" is considered as transaction identifier so it's possible to use the same UUID value for one or more synthetic uevents in which case we logically group these uevents together for any userspace listeners. The "UUID" is expected to be in "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" format where "x" is a hex digit. The value appears in uevent as "SYNTH_UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" environment variable. The "KEY=VALUE" pairs can contain alphanumeric characters only. It's possible to define zero or more more pairs - each pair is then delimited by a space character " ". Each pair appears in synthetic uevents as "SYNTH_ARG_KEY=VALUE" environment variable. That means the KEY name gains "SYNTH_ARG_" prefix to avoid possible collisions with existing variables. To pass the "KEY=VALUE" pairs, it's also required to pass in the "UUID" part for the synthetic uevent first. If "UUID" is not passed in, the generated synthetic uevent gains "SYNTH_UUID=0" environment variable automatically so it's possible to identify this situation in userspace when reading generated uevent and so we can still make a difference between genuine and synthetic uevents. Signed-off-by: Peter Rajnoha Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index ca85cb80e99a..eeab34b0f589 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -217,11 +217,9 @@ extern struct kobject *firmware_kobj; int kobject_uevent(struct kobject *kobj, enum kobject_action action); int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, char *envp[]); +int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count); __printf(2, 3) int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); -int kobject_action_type(const char *buf, size_t count, - enum kobject_action *type); - #endif /* _KOBJECT_H_ */ -- cgit v1.2.3 From 89cf2a20c3f13dbb4c15a0c6d2e390e700992173 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sun, 21 May 2017 01:58:07 -0700 Subject: sysfs: remove signedness from sysfs_get_dirent sysfs_get_dirent is usually invoked with a string literal, which have the type char[]. While the toplevel Makefile disables -Wpointer-sign, other Makefiles like arch/x86/boot/compressed/Makefile redefine KBUILD_CFLAGS. Fixes the warning: In file included from arch/x86/boot/compressed/kaslr.c:17: In file included from ./include/linux/module.h:17: In file included from ./include/linux/kobject.h:21: ./include/linux/sysfs.h:517:37: warning: passing 'const unsigned char *' to parameter of type 'const char *' converts between pointers to integer types with different sign [-Wpointer-sign] return kernfs_find_and_get(parent, name); ^~~~ ./include/linux/kernfs.h:462:57: note: passing argument to parameter 'name' here kernfs_find_and_get(struct kernfs_node *kn, const char *name) ^ Signed-off-by: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c6f0f0d0e17e..aa02c328dff5 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -512,7 +512,7 @@ static inline void sysfs_notify_dirent(struct kernfs_node *kn) } static inline struct kernfs_node *sysfs_get_dirent(struct kernfs_node *parent, - const unsigned char *name) + const char *name) { return kernfs_find_and_get(parent, name); } -- cgit v1.2.3 From 8f553c498e1772cccb39a114da4a498d22992758 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 May 2017 10:15:12 +0200 Subject: cpu/hotplug: Provide cpus_read|write_[un]lock() The counting 'rwsem' hackery of get|put_online_cpus() is going to be replaced by percpu rwsem. Rename the functions to make it clear that it's locking and not some refcount style interface. These new functions will be used for the preparatory patches which make the code ready for the percpu rwsem conversion. Rename all instances in the cpu hotplug code while at it. Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Acked-by: Paul E. McKenney Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170524081547.080397752@linutronix.de --- include/linux/cpu.h | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f92081234afd..055876003914 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -99,26 +99,30 @@ static inline void cpu_maps_update_done(void) extern struct bus_type cpu_subsys; #ifdef CONFIG_HOTPLUG_CPU -/* Stop CPUs going up and down. */ - -extern void cpu_hotplug_begin(void); -extern void cpu_hotplug_done(void); -extern void get_online_cpus(void); -extern void put_online_cpus(void); +extern void cpus_write_lock(void); +extern void cpus_write_unlock(void); +extern void cpus_read_lock(void); +extern void cpus_read_unlock(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); int cpu_down(unsigned int cpu); -#else /* CONFIG_HOTPLUG_CPU */ - -static inline void cpu_hotplug_begin(void) {} -static inline void cpu_hotplug_done(void) {} -#define get_online_cpus() do { } while (0) -#define put_online_cpus() do { } while (0) -#define cpu_hotplug_disable() do { } while (0) -#define cpu_hotplug_enable() do { } while (0) -#endif /* CONFIG_HOTPLUG_CPU */ +#else /* CONFIG_HOTPLUG_CPU */ + +static inline void cpus_write_lock(void) { } +static inline void cpus_write_unlock(void) { } +static inline void cpus_read_lock(void) { } +static inline void cpus_read_unlock(void) { } +static inline void cpu_hotplug_disable(void) { } +static inline void cpu_hotplug_enable(void) { } +#endif /* !CONFIG_HOTPLUG_CPU */ + +/* Wrappers which go away once all code is converted */ +static inline void cpu_hotplug_begin(void) { cpus_write_lock(); } +static inline void cpu_hotplug_done(void) { cpus_write_unlock(); } +static inline void get_online_cpus(void) { cpus_read_lock(); } +static inline void put_online_cpus(void) { cpus_read_unlock(); } #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); -- cgit v1.2.3 From ade3f680a76b474d9f5375a9b1d100ee787bf469 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 May 2017 10:15:13 +0200 Subject: cpu/hotplug: Provide lockdep_assert_cpus_held() Provide a stub function which can be used in places where existing get_online_cpus() calls are moved to call sites. This stub is going to be filled by the final conversion of the hotplug locking mechanism to a percpu rwsem. Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Acked-by: Paul E. McKenney Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170524081547.161282442@linutronix.de --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 055876003914..af4d660798e5 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -103,6 +103,7 @@ extern void cpus_write_lock(void); extern void cpus_write_unlock(void); extern void cpus_read_lock(void); extern void cpus_read_unlock(void); +static inline void lockdep_assert_cpus_held(void) { } extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); @@ -114,6 +115,7 @@ static inline void cpus_write_lock(void) { } static inline void cpus_write_unlock(void) { } static inline void cpus_read_lock(void) { } static inline void cpus_read_unlock(void) { } +static inline void lockdep_assert_cpus_held(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } #endif /* !CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 71def423fe3da0d40ad3427a4cd5f9edc53bff67 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 24 May 2017 10:15:14 +0200 Subject: cpu/hotplug: Provide cpuhp_setup/remove_state[_nocalls]_cpuslocked() Some call sites of cpuhp_setup/remove_state[_nocalls]() are within a cpus_read locked region. cpuhp_setup/remove_state[_nocalls]() call cpus_read_lock() as well, which is possible in the current implementation but prevents converting the hotplug locking to a percpu rwsem. Provide locked versions of the interfaces to avoid nested calls to cpus_read_lock(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170524081547.239600868@linutronix.de --- include/linux/cpuhotplug.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0f2a80377520..4fac564dde70 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -153,6 +153,11 @@ int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu), bool multi_instance); +int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name, + bool invoke, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu), + bool multi_instance); /** * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks * @state: The state for which the calls are installed @@ -171,6 +176,15 @@ static inline int cpuhp_setup_state(enum cpuhp_state state, return __cpuhp_setup_state(state, name, true, startup, teardown, false); } +static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state, + const char *name, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu)) +{ + return __cpuhp_setup_state_cpuslocked(state, name, true, startup, + teardown, false); +} + /** * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the * callbacks @@ -191,6 +205,15 @@ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state, false); } +static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state, + const char *name, + int (*startup)(unsigned int cpu), + int (*teardown)(unsigned int cpu)) +{ + return __cpuhp_setup_state_cpuslocked(state, name, false, startup, + teardown, false); +} + /** * cpuhp_setup_state_multi - Add callbacks for multi state * @state: The state for which the calls are installed @@ -250,6 +273,7 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state, } void __cpuhp_remove_state(enum cpuhp_state state, bool invoke); +void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke); /** * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown @@ -273,6 +297,11 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state) __cpuhp_remove_state(state, false); } +static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state) +{ + __cpuhp_remove_state_cpuslocked(state, false); +} + /** * cpuhp_remove_multi_state - Remove hotplug multi state callback * @state: The state for which the calls are removed -- cgit v1.2.3 From 9805c6733349ea3ccd22cf75b8ebaabb5290e310 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 May 2017 10:15:15 +0200 Subject: cpu/hotplug: Add __cpuhp_state_add_instance_cpuslocked() Add cpuslocked() variants for the multi instance registration so this can be called from a cpus_read_lock() protected region. Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170524081547.321782217@linutronix.de --- include/linux/cpuhotplug.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4fac564dde70..df3d2719a796 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -240,6 +240,8 @@ static inline int cpuhp_setup_state_multi(enum cpuhp_state state, int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke); +int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state, + struct hlist_node *node, bool invoke); /** * cpuhp_state_add_instance - Add an instance for a state and invoke startup @@ -272,6 +274,13 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state, return __cpuhp_state_add_instance(state, node, false); } +static inline int +cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state, + struct hlist_node *node) +{ + return __cpuhp_state_add_instance_cpuslocked(state, node, false); +} + void __cpuhp_remove_state(enum cpuhp_state state, bool invoke); void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke); -- cgit v1.2.3 From fe5595c074005bd94f0c7d1644175941149f6768 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 24 May 2017 10:15:16 +0200 Subject: stop_machine: Provide stop_machine_cpuslocked() Some call sites of stop_machine() are within a get_online_cpus() protected region. stop_machine() calls get_online_cpus() as well, which is possible in the current implementation but prevents converting the hotplug locking to a percpu rwsem. Provide stop_machine_cpuslocked() to avoid nested calls to get_online_cpus(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Acked-by: Paul E. McKenney Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170524081547.400700852@linutronix.de --- include/linux/stop_machine.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 3cc9632dcc2a..3d60275e3ba9 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -116,15 +116,29 @@ static inline int try_stop_cpus(const struct cpumask *cpumask, * @fn() runs. * * This can be thought of as a very heavy write lock, equivalent to - * grabbing every spinlock in the kernel. */ + * grabbing every spinlock in the kernel. + * + * Protects against CPU hotplug. + */ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); +/** + * stop_machine_cpuslocked: freeze the machine on all CPUs and run this function + * @fn: the function to run + * @data: the data ptr for the @fn() + * @cpus: the cpus to run the @fn() on (NULL = any online cpu) + * + * Same as above. Must be called from with in a cpus_read_lock() protected + * region. Avoids nested calls to cpus_read_lock(). + */ +int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); + int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); #else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ -static inline int stop_machine(cpu_stop_fn_t fn, void *data, - const struct cpumask *cpus) +static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, + const struct cpumask *cpus) { unsigned long flags; int ret; @@ -134,6 +148,12 @@ static inline int stop_machine(cpu_stop_fn_t fn, void *data, return ret; } +static inline int stop_machine(cpu_stop_fn_t fn, void *data, + const struct cpumask *cpus) +{ + return stop_machine_cpuslocked(fn, data, cpus); +} + static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { -- cgit v1.2.3 From 9596695ee1e7eedd743c43811fe68299eb005b5c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 May 2017 10:15:17 +0200 Subject: padata: Make padata_alloc() static No users outside of padata.c Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Acked-by: Ingo Molnar Cc: Steffen Klassert Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Cc: linux-crypto@vger.kernel.org Link: http://lkml.kernel.org/r/20170524081547.491457256@linutronix.de --- include/linux/padata.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 0f9e567d5e15..2f9c1f93b1ce 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -166,9 +166,6 @@ struct padata_instance { extern struct padata_instance *padata_alloc_possible( struct workqueue_struct *wq); -extern struct padata_instance *padata_alloc(struct workqueue_struct *wq, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask); extern void padata_free(struct padata_instance *pinst); extern int padata_do_parallel(struct padata_instance *pinst, struct padata_priv *padata, int cb_cpu); -- cgit v1.2.3 From 0b2c2a71e6f07fb67e6f72817d39910f64d2e258 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 May 2017 10:15:32 +0200 Subject: PCI: Replace the racy recursion prevention pci_call_probe() can called recursively when a physcial function is probed and the probing creates virtual functions, which are populated via pci_bus_add_device() which in turn can end up calling pci_call_probe() again. The code has an interesting way to prevent recursing into the workqueue code. That's accomplished by a check whether the current task runs already on the numa node which is associated with the device. While that works to prevent the recursion into the workqueue code, it's racy versus normal execution as there is no guarantee that the node does not vanish after the check. There is another issue with this code. It dereferences cpumask_of_node() unconditionally without checking whether the node is available. Make the detection reliable by: - Mark a probed device as 'is_probed' in pci_call_probe() - Check in pci_call_probe for a virtual function. If it's a virtual function and the associated physical function device is marked 'is_probed' then this is a recursive call, so the call can be invoked in the calling context. - Add a check whether the node is online before dereferencing it. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Acked-by: Bjorn Helgaas Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: linux-pci@vger.kernel.org Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170524081548.771457199@linutronix.de --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..5026f2ae86db 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -371,6 +371,7 @@ struct pci_dev { unsigned int irq_managed:1; unsigned int has_secondary_link:1; unsigned int non_compliant_bars:1; /* broken BARs; ignore them */ + unsigned int is_probed:1; /* device probing in progress */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ -- cgit v1.2.3 From a63fbed776c7124ce9f606234267c3c095b2680e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 May 2017 10:15:34 +0200 Subject: perf/tracing/cpuhotplug: Fix locking order perf, tracing, kprobes and jump_labels have a gazillion of ways to create dependency lock chains. Some of those involve nested invocations of get_online_cpus(). The conversion of the hotplug locking to a percpu rwsem requires to avoid such nested calls. sys_perf_event_open() protects most of the syscall logic against cpu hotplug. This causes nested calls and lock inversions versus ftrace and kprobes in various interesting ways. It's impossible to move the hotplug locking to the outer end of all call chains in the involved facilities, so the hotplug protection in sys_perf_event_open() needs to be solved differently. Introduce 'pmus_mutex' which protects a perf private online cpumask. This mutex is taken when the mask is updated in the cpu hotplug callbacks and can be taken in sys_perf_event_open() to protect the swhash setup/teardown code and when the final judgement about a valid event has to be made. [ tglx: Produced changelog and fixed the swhash interaction ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Paul E. McKenney Cc: Sebastian Siewior Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Masami Hiramatsu Link: http://lkml.kernel.org/r/20170524081548.930941109@linutronix.de --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 24a635887f28..7d6aa29094b2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -801,6 +801,8 @@ struct perf_cpu_context { struct list_head sched_cb_entry; int sched_cb_usage; + + int online; }; struct perf_output_handle { -- cgit v1.2.3 From fc8dffd379ca5620664336eb895a426b42847558 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 May 2017 10:15:40 +0200 Subject: cpu/hotplug: Convert hotplug locking to percpu rwsem There are no more (known) nested calls to get_online_cpus() and all observed lock ordering problems have been addressed. Replace the magic nested 'rwsem' hackery with a percpu-rwsem. Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170524081549.447014063@linutronix.de --- include/linux/cpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index af4d660798e5..ca73bc1563f4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -103,7 +103,7 @@ extern void cpus_write_lock(void); extern void cpus_write_unlock(void); extern void cpus_read_lock(void); extern void cpus_read_unlock(void); -static inline void lockdep_assert_cpus_held(void) { } +extern void lockdep_assert_cpus_held(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); -- cgit v1.2.3 From 62ec05dd71b19f5be890a1992227cc7b2ac0adc4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 24 May 2017 10:15:41 +0200 Subject: sched: Provide is_percpu_thread() helper Provide a helper function for checking whether current task is a per cpu thread. Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Acked-by: Ingo Molnar Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170524081549.541649540@linutronix.de --- include/linux/sched.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b69fc650201..3dfa5f99d6ee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1265,6 +1265,16 @@ extern struct pid *cad_pid; #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) +static inline bool is_percpu_thread(void) +{ +#ifdef CONFIG_SMP + return (current->flags & PF_NO_SETAFFINITY) && + (current->nr_cpus_allowed == 1); +#else + return true; +#endif +} + /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ -- cgit v1.2.3 From 6c364062bfed3c34490e85bea52ff6e2d4f0f281 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 May 2017 15:11:41 +0200 Subject: spi: core: Add support for registering SPI slave controllers Add support for registering SPI slave controllers using the existing SPI master framework: - SPI slave controllers must use spi_alloc_slave() instead of spi_alloc_master(), and should provide an additional callback "slave_abort" to abort an ongoing SPI transfer request, - SPI slave controllers are added to a new "spi_slave" device class, - SPI slave handlers can be bound to the SPI slave device represented by an SPI slave controller using a DT child node named "slave", - Alternatively, (un)binding an SPI slave handler to the SPI slave device represented by an SPI slave controller can be done by (un)registering the slave device through a sysfs virtual file named "slave". From the point of view of an SPI slave protocol handler, an SPI slave controller looks almost like an ordinary SPI master controller. The only exception is that a transfer request will block on the remote SPI master, and may be cancelled using spi_slave_abort(). Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 935bd2854ff1..0a78745e5766 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -29,8 +29,8 @@ struct spi_transfer; struct spi_flash_read_message; /* - * INTERFACES between SPI master-side drivers and SPI infrastructure. - * (There's no SPI slave support for Linux yet...) + * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, + * and SPI infrastructure. */ extern struct bus_type spi_bus_type; @@ -311,6 +311,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @min_speed_hz: Lowest supported transfer speed * @max_speed_hz: Highest supported transfer speed * @flags: other constraints relevant to this driver + * @slave: indicates that this is an SPI slave controller * @max_transfer_size: function that returns the max transfer size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. * @max_message_size: function that returns the max message size for @@ -374,6 +375,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @handle_err: the subsystem calls the driver to handle an error that occurs * in the generic implementation of transfer_one_message(). * @unprepare_message: undo any work done by prepare_message(). + * @slave_abort: abort the ongoing transfer request on an SPI slave controller * @spi_flash_read: to support spi-controller hardwares that provide * accelerated interface to read from flash devices. * @spi_flash_can_dma: analogous to can_dma() interface, but for @@ -447,6 +449,9 @@ struct spi_master { #define SPI_MASTER_MUST_TX BIT(4) /* requires tx */ #define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */ + /* flag indicating this is an SPI slave controller */ + bool slave; + /* * on some hardware transfer / message size may be constrained * the limit may depend on device transfer settings @@ -539,6 +544,7 @@ struct spi_master { struct spi_message *message); int (*unprepare_message)(struct spi_master *master, struct spi_message *message); + int (*slave_abort)(struct spi_master *spi); int (*spi_flash_read)(struct spi_device *spi, struct spi_flash_read_message *msg); bool (*spi_flash_can_dma)(struct spi_device *spi, @@ -595,6 +601,11 @@ static inline void spi_master_put(struct spi_master *master) put_device(&master->dev); } +static inline bool spi_controller_is_slave(struct spi_master *ctlr) +{ + return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave; +} + /* PM calls that need to be issued by the driver */ extern int spi_master_suspend(struct spi_master *master); extern int spi_master_resume(struct spi_master *master); @@ -605,8 +616,23 @@ extern void spi_finalize_current_message(struct spi_master *master); extern void spi_finalize_current_transfer(struct spi_master *master); /* the spi driver core manages memory for the spi_master classdev */ -extern struct spi_master * -spi_alloc_master(struct device *host, unsigned size); +extern struct spi_master *__spi_alloc_controller(struct device *host, + unsigned int size, bool slave); + +static inline struct spi_master *spi_alloc_master(struct device *host, + unsigned int size) +{ + return __spi_alloc_controller(host, size, false); +} + +static inline struct spi_master *spi_alloc_slave(struct device *host, + unsigned int size) +{ + if (!IS_ENABLED(CONFIG_SPI_SLAVE)) + return NULL; + + return __spi_alloc_controller(host, size, true); +} extern int spi_register_master(struct spi_master *master); extern int devm_spi_register_master(struct device *dev, @@ -912,6 +938,7 @@ extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_async_locked(struct spi_device *spi, struct spi_message *message); +extern int spi_slave_abort(struct spi_device *spi); static inline size_t spi_max_message_size(struct spi_device *spi) -- cgit v1.2.3 From cf9e4784f3bde3e4749163384f27450ddffe746c Mon Sep 17 00:00:00 2001 From: Hisashi Nakamura Date: Mon, 22 May 2017 15:11:43 +0200 Subject: spi: sh-msiof: Add slave mode support Add slave mode support to the MSIOF driver, in both PIO and DMA mode. For now this only supports the transmission of messages with a size that is known in advance. Signed-off-by: Hisashi Nakamura Signed-off-by: Hiromitsu Yamasaki [geert: Timeout handling cleanup, spi core integration, cancellation, rewording] Signed-off-by: Geert Uytterhoeven Acked-by: Rob Herring Signed-off-by: Mark Brown --- include/linux/spi/sh_msiof.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h index b087a85f5f72..f74b581f242f 100644 --- a/include/linux/spi/sh_msiof.h +++ b/include/linux/spi/sh_msiof.h @@ -1,10 +1,16 @@ #ifndef __SPI_SH_MSIOF_H__ #define __SPI_SH_MSIOF_H__ +enum { + MSIOF_SPI_MASTER, + MSIOF_SPI_SLAVE, +}; + struct sh_msiof_spi_info { int tx_fifo_override; int rx_fifo_override; u16 num_chipselect; + int mode; unsigned int dma_tx_id; unsigned int dma_rx_id; u32 dtdl; -- cgit v1.2.3 From 1f51445af35e8477027d87ca015a10257b13f5a2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 26 May 2017 08:37:23 +0200 Subject: bridge: Export VLAN filtering state It's useful for drivers supporting bridge offload to be able to query the bridge's VLAN filtering state. Currently, upon enslavement to a bridge master, the offloading driver will only learn about the bridge's VLAN filtering state after the bridge device was already linked with its slave. Being able to query the bridge's VLAN filtering state allows such drivers to forbid enslavement in case resource couldn't be allocated for a VLAN-aware bridge and also choose the correct initialization routine for the enslaved port, which is dependent on the bridge type. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 0c16866a7aac..d6cd103eb165 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -80,4 +80,13 @@ static inline bool br_multicast_has_querier_adjacent(struct net_device *dev, } #endif +#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) +bool br_vlan_enabled(const struct net_device *dev); +#else +static inline bool br_vlan_enabled(const struct net_device *dev) +{ + return false; +} +#endif + #endif -- cgit v1.2.3 From 9341b988e606f951df57d15569a425c6c74b945e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 26 May 2017 08:37:24 +0200 Subject: bridge: Export multicast enabled state During enslavement to a bridge, after the CHANGEUPPER is sent, the multicast enabled state of the bridge isn't propagated down to the offloading driver unless it's changed. This patch allows such drivers to query the multicast enabled state from the bridge, so that they'll be able to correctly configure their flood tables during port enslavement. In case multicast is disabled, unregistered multicast packets can be treated as broadcast and be flooded through all the bridge ports. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index d6cd103eb165..3cd18ac0697f 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -62,6 +62,7 @@ int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list); bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto); bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto); +bool br_multicast_enabled(const struct net_device *dev); #else static inline int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list) @@ -78,6 +79,10 @@ static inline bool br_multicast_has_querier_adjacent(struct net_device *dev, { return false; } +static inline bool br_multicast_enabled(const struct net_device *dev) +{ + return false; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) -- cgit v1.2.3 From d3ba5a9a345b1243276f8a982e1bce557c2504fd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 May 2017 12:03:11 +0300 Subject: posix-timers: Make posix_clocks immutable There are no more modular users providing a posix clock. The register function is now pointless so the posix clock array can be initialized statically at compile time and the array including the various k_clock structs can be marked 'const'. Inspired by changes in the Grsecurity patch set, but done proper. [ tglx: Massaged changelog and fixed the POSIX_TIMER=n case ] Signed-off-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Cc: Mike Travis Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/20170526090311.3377-3-hch@lst.de --- include/linux/posix-timers.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 8c1e43ab14a9..b313ef2e7385 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -105,10 +105,11 @@ struct k_clock { struct itimerspec64 *cur_setting); }; -extern struct k_clock clock_posix_cpu; -extern struct k_clock clock_posix_dynamic; - -void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock); +extern const struct k_clock clock_posix_cpu; +extern const struct k_clock clock_posix_dynamic; +extern const struct k_clock clock_process; +extern const struct k_clock clock_thread; +extern const struct k_clock alarm_clock; /* function to call to trigger timer event */ int posix_timer_event(struct k_itimer *timr, int si_private); -- cgit v1.2.3 From a75d30c772078546ac00399a94ecdc82df1a4d72 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 27 May 2017 06:07:19 -0400 Subject: fs/locks: pass kernel struct flock to fcntl_getlk/setlk This will make it easier to implement a sane compat fcntl syscall. [ jlayton: fix undeclared identifiers in 32-bit fcntl64 syscall handler ] Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Signed-off-by: Jeff Layton --- include/linux/fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..aa4affb38c39 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1038,14 +1038,14 @@ static inline struct inode *locks_inode(const struct file *f) } #ifdef CONFIG_FILE_LOCKING -extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); +extern int fcntl_getlk(struct file *, unsigned int, struct flock *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, - struct flock __user *); + struct flock *); #if BITS_PER_LONG == 32 -extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *); +extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 *); extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, - struct flock64 __user *); + struct flock64 *); #endif extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); -- cgit v1.2.3 From f822798e3ced63427d57d128ee8d118126455f84 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Fri, 26 May 2017 23:30:52 +0200 Subject: posix-timers: Remove mmtimer leftovers After removing mmtimer, the mmtimer struct can be removed from the k_itimer struct. Signed-off-by: Thomas Gleixner Cc: Russ Anderson Cc: Dimitri Sivanich Cc: Mike Travis Cc: Nate Zimmer Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170526130534.GE30788@hpe.com --- include/linux/posix-timers.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index b313ef2e7385..34e893a75771 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -72,12 +72,6 @@ struct k_itimer { ktime_t interval; } real; struct cpu_timer_list cpu; - struct { - unsigned int clock; - unsigned int node; - unsigned long incr; - unsigned long expires; - } mmtimer; struct { struct alarm alarmtimer; ktime_t interval; -- cgit v1.2.3 From 613763a1f056211522bac77ff39f25706e678fdd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 26 May 2017 22:04:29 -0400 Subject: take compat_sys_old_getrlimit() to native syscall ... and sanitize the ifdefs in there Signed-off-by: Al Viro --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 980c3c9b06f8..3cb15ea48aee 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -650,7 +650,7 @@ asmlinkage long sys_olduname(struct oldold_utsname __user *); asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim); -#if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64)) +#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim); #endif asmlinkage long sys_setrlimit(unsigned int resource, -- cgit v1.2.3 From 3d3ea5af5c0b382bc9d9aed378fd814fb5d4a011 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 27 May 2017 10:14:34 -0400 Subject: rtnl: Add support for netdev event to link messages When netdev events happen, a rtnetlink_event() handler will send messages for every event in it's white list. These messages contain current information about a particular device, but they do not include the iformation about which event just happened. So, it is impossible to tell what just happend for these events. This patch adds a new extension to RTM_NEWLINK message called IFLA_EVENT that would have an encoding of event that triggered this message. This would allow the the message consumer to easily determine if it needs to perform certain actions. Signed-off-by: Vladislav Yasevich Acked-by: David Ahern Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 57e54847b0b9..dea59c8eec54 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -18,7 +18,8 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, - unsigned change, gfp_t flags); + unsigned change, u32 event, + gfp_t flags); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags); -- cgit v1.2.3 From a3995460491d4570af8e99ad34ddf6d1948254d9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 27 May 2017 10:42:25 -0700 Subject: net: phy: Relax error checking on sysfs_create_link() Some Ethernet drivers will attach/connect to a PHY device before calling register_netdevice() which is responsible for calling netdev_register_kobject() which would do the network device's kobject initialization. In such a case, sysfs_create_link() would return -ENOENT because the network device's kobject is not ready yet, and we would fail to connect to the PHY device. In order to keep things simple and symetrical, we just take the success path as indicative of the ability to access the network device's kobject, and create the second link if that's the case. Fixes: 5568363f0cb3 ("net: phy: Create sysfs reciprocal links for attached_dev/phydev") Reported-by: Woojung Hung Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5a808a26e4cf..58f1b45a4c44 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -363,6 +363,7 @@ struct phy_c45_device_ids { * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc. * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. + * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * state: state of the PHY for management purposes * dev_flags: Device-specific flags used by the PHY driver. * link_timeout: The number of timer firings to wait before the @@ -399,6 +400,7 @@ struct phy_device { bool is_pseudo_fixed_link; bool has_fixups; bool suspended; + bool sysfs_links; enum phy_state state; -- cgit v1.2.3 From 3e08b2df12983159ea2d9a1aa2b2bc601093b3cb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:12 +0530 Subject: thermal: cpu_cooling: remove cpufreq_cooling_get_level() There is only one user of cpufreq_cooling_get_level() and that already has pointer to the cpufreq_cdev structure. It can directly call get_level() instead and we can get rid of cpufreq_cooling_get_level(). Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- include/linux/cpu_cooling.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index c156f5082758..96c5e4c2f9c8 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -82,7 +82,6 @@ of_cpufreq_power_cooling_register(struct device_node *np, */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); -unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus) @@ -117,11 +116,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { return; } -static inline -unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) -{ - return THERMAL_CSTATE_INVALID; -} #endif /* CONFIG_CPU_THERMAL */ #endif /* __CPU_COOLING_H__ */ -- cgit v1.2.3 From 4d753aa7b6279e4b7d338947a434689962f430d1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:14 +0530 Subject: thermal: cpu_cooling: use cpufreq_policy to register cooling device The CPU cooling driver uses the cpufreq policy, to get clip_cpus, the frequency table, etc. Most of the callers of CPU cooling driver's registration routines have the cpufreq policy with them, but they only pass the policy->related_cpus cpumask. The __cpufreq_cooling_register() routine then gets the policy by itself and uses it. It would be much better if the callers can pass the policy instead directly. This also fixes a basic design flaw, where the policy can be freed while the CPU cooling driver is still active. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- include/linux/cpu_cooling.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 96c5e4c2f9c8..d4292ebc5c8b 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -28,47 +28,49 @@ #include #include +struct cpufreq_policy; + typedef int (*get_static_t)(cpumask_t *cpumask, int interval, unsigned long voltage, u32 *power); #ifdef CONFIG_CPU_THERMAL /** * cpufreq_cooling_register - function to create cpufreq cooling device. - * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @policy: cpufreq policy. */ struct thermal_cooling_device * -cpufreq_cooling_register(const struct cpumask *clip_cpus); +cpufreq_cooling_register(struct cpufreq_policy *policy); struct thermal_cooling_device * -cpufreq_power_cooling_register(const struct cpumask *clip_cpus, +cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func); /** * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. * @np: a valid struct device_node to the cooling device device tree node. - * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @policy: cpufreq policy. */ #ifdef CONFIG_THERMAL_OF struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus); + struct cpufreq_policy *policy); struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func); #else static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus) + struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { @@ -84,12 +86,12 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * -cpufreq_cooling_register(const struct cpumask *clip_cpus) +cpufreq_cooling_register(struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * -cpufreq_power_cooling_register(const struct cpumask *clip_cpus, +cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { return NULL; @@ -97,14 +99,14 @@ cpufreq_power_cooling_register(const struct cpumask *clip_cpus, static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus) + struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { -- cgit v1.2.3 From 55d852931319d2e3ccde86cd426405231ce6c6ac Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:15 +0530 Subject: cpufreq: create cpufreq_table_count_valid_entries() We need such a routine at two places already, lets create one. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- include/linux/cpufreq.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a5ce0bbeadb5..eb9abfadaeac 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -862,6 +862,20 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, return -EINVAL; } } + +static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy) +{ + struct cpufreq_frequency_table *pos; + int count = 0; + + if (unlikely(!policy->freq_table)) + return 0; + + cpufreq_for_each_valid_entry(pos, policy->freq_table) + count++; + + return count; +} #else static inline int cpufreq_boost_trigger_state(int state) { -- cgit v1.2.3 From 0aa5e49c6845ecd82531341085f367767c9f419a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 5 Apr 2017 09:49:19 -0700 Subject: compiler: Add __designated_init annotation This allows structure annotations for requiring designated initialization in GCC 5.1.0 and later: https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html The structure randomization layout plugin will be using this to help identify structures that need this form of initialization. Signed-off-by: Kees Cook --- include/linux/compiler-gcc.h | 8 ++++++++ include/linux/compiler.h | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0efef9cf014f..386caf6771ed 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -294,6 +294,14 @@ #define __no_sanitize_address __attribute__((no_sanitize_address)) #endif +#if GCC_VERSION >= 50100 +/* + * Mark structures as requiring designated initializers. + * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html + */ +#define __designated_init __attribute__((designated_init)) +#endif + #endif /* gcc version >= 40000 specific checks */ #if !defined(__noclone) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f8110051188f..80a1dea36cbe 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -440,6 +440,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s # define __attribute_const__ /* unimplemented */ #endif +#ifndef __designated_init +# define __designated_init +#endif + #ifndef __latent_entropy # define __latent_entropy #endif -- cgit v1.2.3 From 05f479bf7d239f01ff6546f2bdeb14ad0fe65601 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 23 May 2017 15:47:29 +0100 Subject: gpio: Add new flags to control sleep status of GPIOs Add new flags to allow users to specify that they are not concerned with the status of GPIOs whilst in a sleep/low power state. Signed-off-by: Charles Keepax Acked-by: Rob Herring Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 3 +++ include/linux/gpio/machine.h | 2 ++ include/linux/of_gpio.h | 1 + 3 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 393582867afd..af20369ec8e7 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -213,6 +213,9 @@ bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset); bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset); bool gpiochip_line_is_open_source(struct gpio_chip *chip, unsigned int offset); +/* Sleep persistence inquiry for drivers */ +bool gpiochip_line_is_persistent(struct gpio_chip *chip, unsigned int offset); + /* get driver data */ void *gpiochip_get_data(struct gpio_chip *chip); diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index c0d712d22b07..13adadf53c09 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -9,6 +9,8 @@ enum gpio_lookup_flags { GPIO_ACTIVE_LOW = (1 << 0), GPIO_OPEN_DRAIN = (1 << 1), GPIO_OPEN_SOURCE = (1 << 2), + GPIO_SLEEP_MAINTAIN_VALUE = (0 << 3), + GPIO_SLEEP_MAY_LOOSE_VALUE = (1 << 3), }; /** diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 1e089d5a182b..ca10f43564de 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -31,6 +31,7 @@ enum of_gpio_flags { OF_GPIO_ACTIVE_LOW = 0x1, OF_GPIO_SINGLE_ENDED = 0x2, OF_GPIO_OPEN_DRAIN = 0x4, + OF_GPIO_SLEEP_MAY_LOOSE_VALUE = 0x8, }; #ifdef CONFIG_OF_GPIO -- cgit v1.2.3 From 25e3ef894eef419ee239da42edc6c1f8a4f1cfb5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 23 May 2017 20:03:24 +0300 Subject: gpio: acpi: Split out acpi_gpio_get_irq_resource() helper The helper does retrieve pointer to struct acpi_resource_gpio from struct acpi_resource if it represents GpioInt() resource. It will be used by PNP code later on. Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg Signed-off-by: Linus Walleij --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 137e4a3d89c5..d5aa3c42f64d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -964,6 +964,8 @@ int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios); void devm_acpi_dev_remove_driver_gpios(struct device *dev); +bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, + struct acpi_resource_gpio **agpio); int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index); #else static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev, @@ -980,6 +982,11 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, } static inline void devm_acpi_dev_remove_driver_gpios(struct device *dev) {} +static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, + struct acpi_resource_gpio **agpio) +{ + return false; +} static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) { return -ENXIO; -- cgit v1.2.3 From c1a4634013141b96324c647b45356e16f1fff781 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 23:57:27 +0200 Subject: gpio: adp5588: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Dmitry Torokhov Signed-off-by: Linus Walleij --- include/linux/i2c/adp5588.h | 172 ---------------------------------- include/linux/platform_data/adp5588.h | 172 ++++++++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+), 172 deletions(-) delete mode 100644 include/linux/i2c/adp5588.h create mode 100644 include/linux/platform_data/adp5588.h (limited to 'include/linux') diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h deleted file mode 100644 index c2153049cfbd..000000000000 --- a/include/linux/i2c/adp5588.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller - * - * Copyright 2009-2010 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#ifndef _ADP5588_H -#define _ADP5588_H - -#define DEV_ID 0x00 /* Device ID */ -#define CFG 0x01 /* Configuration Register1 */ -#define INT_STAT 0x02 /* Interrupt Status Register */ -#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */ -#define Key_EVENTA 0x04 /* Key Event Register A */ -#define Key_EVENTB 0x05 /* Key Event Register B */ -#define Key_EVENTC 0x06 /* Key Event Register C */ -#define Key_EVENTD 0x07 /* Key Event Register D */ -#define Key_EVENTE 0x08 /* Key Event Register E */ -#define Key_EVENTF 0x09 /* Key Event Register F */ -#define Key_EVENTG 0x0A /* Key Event Register G */ -#define Key_EVENTH 0x0B /* Key Event Register H */ -#define Key_EVENTI 0x0C /* Key Event Register I */ -#define Key_EVENTJ 0x0D /* Key Event Register J */ -#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */ -#define UNLOCK1 0x0F /* Unlock Key1 */ -#define UNLOCK2 0x10 /* Unlock Key2 */ -#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */ -#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */ -#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */ -#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */ -#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */ -#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */ -#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */ -#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */ -#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */ -#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */ -#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */ -#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */ -#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */ -#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */ -#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */ -#define GPI_EM1 0x20 /* GPI Event Mode 1 */ -#define GPI_EM2 0x21 /* GPI Event Mode 2 */ -#define GPI_EM3 0x22 /* GPI Event Mode 3 */ -#define GPIO_DIR1 0x23 /* GPIO Data Direction */ -#define GPIO_DIR2 0x24 /* GPIO Data Direction */ -#define GPIO_DIR3 0x25 /* GPIO Data Direction */ -#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */ -#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */ -#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */ -#define Debounce_DIS1 0x29 /* Debounce Disable */ -#define Debounce_DIS2 0x2A /* Debounce Disable */ -#define Debounce_DIS3 0x2B /* Debounce Disable */ -#define GPIO_PULL1 0x2C /* GPIO Pull Disable */ -#define GPIO_PULL2 0x2D /* GPIO Pull Disable */ -#define GPIO_PULL3 0x2E /* GPIO Pull Disable */ -#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */ -#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */ -#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */ -#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */ -#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */ -#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */ -#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */ -#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */ -#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */ -#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */ -#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */ -#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */ -#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */ -#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */ -#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */ - -#define ADP5588_DEVICE_ID_MASK 0xF - - /* Configuration Register1 */ -#define ADP5588_AUTO_INC (1 << 7) -#define ADP5588_GPIEM_CFG (1 << 6) -#define ADP5588_OVR_FLOW_M (1 << 5) -#define ADP5588_INT_CFG (1 << 4) -#define ADP5588_OVR_FLOW_IEN (1 << 3) -#define ADP5588_K_LCK_IM (1 << 2) -#define ADP5588_GPI_IEN (1 << 1) -#define ADP5588_KE_IEN (1 << 0) - -/* Interrupt Status Register */ -#define ADP5588_CMP2_INT (1 << 5) -#define ADP5588_CMP1_INT (1 << 4) -#define ADP5588_OVR_FLOW_INT (1 << 3) -#define ADP5588_K_LCK_INT (1 << 2) -#define ADP5588_GPI_INT (1 << 1) -#define ADP5588_KE_INT (1 << 0) - -/* Key Lock and Event Counter Register */ -#define ADP5588_K_LCK_EN (1 << 6) -#define ADP5588_LCK21 0x30 -#define ADP5588_KEC 0xF - -#define ADP5588_MAXGPIO 18 -#define ADP5588_BANK(offs) ((offs) >> 3) -#define ADP5588_BIT(offs) (1u << ((offs) & 0x7)) - -/* Put one of these structures in i2c_board_info platform_data */ - -#define ADP5588_KEYMAPSIZE 80 - -#define GPI_PIN_ROW0 97 -#define GPI_PIN_ROW1 98 -#define GPI_PIN_ROW2 99 -#define GPI_PIN_ROW3 100 -#define GPI_PIN_ROW4 101 -#define GPI_PIN_ROW5 102 -#define GPI_PIN_ROW6 103 -#define GPI_PIN_ROW7 104 -#define GPI_PIN_COL0 105 -#define GPI_PIN_COL1 106 -#define GPI_PIN_COL2 107 -#define GPI_PIN_COL3 108 -#define GPI_PIN_COL4 109 -#define GPI_PIN_COL5 110 -#define GPI_PIN_COL6 111 -#define GPI_PIN_COL7 112 -#define GPI_PIN_COL8 113 -#define GPI_PIN_COL9 114 - -#define GPI_PIN_ROW_BASE GPI_PIN_ROW0 -#define GPI_PIN_ROW_END GPI_PIN_ROW7 -#define GPI_PIN_COL_BASE GPI_PIN_COL0 -#define GPI_PIN_COL_END GPI_PIN_COL9 - -#define GPI_PIN_BASE GPI_PIN_ROW_BASE -#define GPI_PIN_END GPI_PIN_COL_END - -#define ADP5588_GPIMAPSIZE_MAX (GPI_PIN_END - GPI_PIN_BASE + 1) - -struct adp5588_gpi_map { - unsigned short pin; - unsigned short sw_evt; -}; - -struct adp5588_kpad_platform_data { - int rows; /* Number of rows */ - int cols; /* Number of columns */ - const unsigned short *keymap; /* Pointer to keymap */ - unsigned short keymapsize; /* Keymap size */ - unsigned repeat:1; /* Enable key repeat */ - unsigned en_keylock:1; /* Enable Key Lock feature */ - unsigned short unlock_key1; /* Unlock Key 1 */ - unsigned short unlock_key2; /* Unlock Key 2 */ - const struct adp5588_gpi_map *gpimap; - unsigned short gpimapsize; - const struct adp5588_gpio_platform_data *gpio_data; -}; - -struct i2c_client; /* forward declaration */ - -struct adp5588_gpio_platform_data { - int gpio_start; /* GPIO Chip base # */ - const char *const *names; - unsigned irq_base; /* interrupt base # */ - unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ - int (*setup)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - int (*teardown)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - void *context; -}; - -#endif diff --git a/include/linux/platform_data/adp5588.h b/include/linux/platform_data/adp5588.h new file mode 100644 index 000000000000..c2153049cfbd --- /dev/null +++ b/include/linux/platform_data/adp5588.h @@ -0,0 +1,172 @@ +/* + * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller + * + * Copyright 2009-2010 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef _ADP5588_H +#define _ADP5588_H + +#define DEV_ID 0x00 /* Device ID */ +#define CFG 0x01 /* Configuration Register1 */ +#define INT_STAT 0x02 /* Interrupt Status Register */ +#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */ +#define Key_EVENTA 0x04 /* Key Event Register A */ +#define Key_EVENTB 0x05 /* Key Event Register B */ +#define Key_EVENTC 0x06 /* Key Event Register C */ +#define Key_EVENTD 0x07 /* Key Event Register D */ +#define Key_EVENTE 0x08 /* Key Event Register E */ +#define Key_EVENTF 0x09 /* Key Event Register F */ +#define Key_EVENTG 0x0A /* Key Event Register G */ +#define Key_EVENTH 0x0B /* Key Event Register H */ +#define Key_EVENTI 0x0C /* Key Event Register I */ +#define Key_EVENTJ 0x0D /* Key Event Register J */ +#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */ +#define UNLOCK1 0x0F /* Unlock Key1 */ +#define UNLOCK2 0x10 /* Unlock Key2 */ +#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */ +#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */ +#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */ +#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */ +#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */ +#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */ +#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */ +#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */ +#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */ +#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */ +#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */ +#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */ +#define GPI_EM1 0x20 /* GPI Event Mode 1 */ +#define GPI_EM2 0x21 /* GPI Event Mode 2 */ +#define GPI_EM3 0x22 /* GPI Event Mode 3 */ +#define GPIO_DIR1 0x23 /* GPIO Data Direction */ +#define GPIO_DIR2 0x24 /* GPIO Data Direction */ +#define GPIO_DIR3 0x25 /* GPIO Data Direction */ +#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */ +#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */ +#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */ +#define Debounce_DIS1 0x29 /* Debounce Disable */ +#define Debounce_DIS2 0x2A /* Debounce Disable */ +#define Debounce_DIS3 0x2B /* Debounce Disable */ +#define GPIO_PULL1 0x2C /* GPIO Pull Disable */ +#define GPIO_PULL2 0x2D /* GPIO Pull Disable */ +#define GPIO_PULL3 0x2E /* GPIO Pull Disable */ +#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */ +#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */ +#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */ +#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */ +#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */ +#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */ +#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */ +#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */ +#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */ +#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */ +#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */ +#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */ +#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */ +#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */ +#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */ + +#define ADP5588_DEVICE_ID_MASK 0xF + + /* Configuration Register1 */ +#define ADP5588_AUTO_INC (1 << 7) +#define ADP5588_GPIEM_CFG (1 << 6) +#define ADP5588_OVR_FLOW_M (1 << 5) +#define ADP5588_INT_CFG (1 << 4) +#define ADP5588_OVR_FLOW_IEN (1 << 3) +#define ADP5588_K_LCK_IM (1 << 2) +#define ADP5588_GPI_IEN (1 << 1) +#define ADP5588_KE_IEN (1 << 0) + +/* Interrupt Status Register */ +#define ADP5588_CMP2_INT (1 << 5) +#define ADP5588_CMP1_INT (1 << 4) +#define ADP5588_OVR_FLOW_INT (1 << 3) +#define ADP5588_K_LCK_INT (1 << 2) +#define ADP5588_GPI_INT (1 << 1) +#define ADP5588_KE_INT (1 << 0) + +/* Key Lock and Event Counter Register */ +#define ADP5588_K_LCK_EN (1 << 6) +#define ADP5588_LCK21 0x30 +#define ADP5588_KEC 0xF + +#define ADP5588_MAXGPIO 18 +#define ADP5588_BANK(offs) ((offs) >> 3) +#define ADP5588_BIT(offs) (1u << ((offs) & 0x7)) + +/* Put one of these structures in i2c_board_info platform_data */ + +#define ADP5588_KEYMAPSIZE 80 + +#define GPI_PIN_ROW0 97 +#define GPI_PIN_ROW1 98 +#define GPI_PIN_ROW2 99 +#define GPI_PIN_ROW3 100 +#define GPI_PIN_ROW4 101 +#define GPI_PIN_ROW5 102 +#define GPI_PIN_ROW6 103 +#define GPI_PIN_ROW7 104 +#define GPI_PIN_COL0 105 +#define GPI_PIN_COL1 106 +#define GPI_PIN_COL2 107 +#define GPI_PIN_COL3 108 +#define GPI_PIN_COL4 109 +#define GPI_PIN_COL5 110 +#define GPI_PIN_COL6 111 +#define GPI_PIN_COL7 112 +#define GPI_PIN_COL8 113 +#define GPI_PIN_COL9 114 + +#define GPI_PIN_ROW_BASE GPI_PIN_ROW0 +#define GPI_PIN_ROW_END GPI_PIN_ROW7 +#define GPI_PIN_COL_BASE GPI_PIN_COL0 +#define GPI_PIN_COL_END GPI_PIN_COL9 + +#define GPI_PIN_BASE GPI_PIN_ROW_BASE +#define GPI_PIN_END GPI_PIN_COL_END + +#define ADP5588_GPIMAPSIZE_MAX (GPI_PIN_END - GPI_PIN_BASE + 1) + +struct adp5588_gpi_map { + unsigned short pin; + unsigned short sw_evt; +}; + +struct adp5588_kpad_platform_data { + int rows; /* Number of rows */ + int cols; /* Number of columns */ + const unsigned short *keymap; /* Pointer to keymap */ + unsigned short keymapsize; /* Keymap size */ + unsigned repeat:1; /* Enable key repeat */ + unsigned en_keylock:1; /* Enable Key Lock feature */ + unsigned short unlock_key1; /* Unlock Key 1 */ + unsigned short unlock_key2; /* Unlock Key 2 */ + const struct adp5588_gpi_map *gpimap; + unsigned short gpimapsize; + const struct adp5588_gpio_platform_data *gpio_data; +}; + +struct i2c_client; /* forward declaration */ + +struct adp5588_gpio_platform_data { + int gpio_start; /* GPIO Chip base # */ + const char *const *names; + unsigned irq_base; /* interrupt base # */ + unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ + int (*setup)(struct i2c_client *client, + unsigned gpio, unsigned ngpio, + void *context); + int (*teardown)(struct i2c_client *client, + unsigned gpio, unsigned ngpio, + void *context); + void *context; +}; + +#endif -- cgit v1.2.3 From 79ffd5f98c11572c004d52f7ecd270ab680a7f72 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 26 May 2017 16:55:10 -0700 Subject: Input: sparse-keymap - remove sparse_keymap_free() Now that all users of sparse_keymap_free() are gone we can remove the stub. Signed-off-by: Dmitry Torokhov --- include/linux/input/sparse-keymap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input/sparse-keymap.h b/include/linux/input/sparse-keymap.h index 52db62064c6e..c7346e33d958 100644 --- a/include/linux/input/sparse-keymap.h +++ b/include/linux/input/sparse-keymap.h @@ -51,7 +51,6 @@ struct key_entry *sparse_keymap_entry_from_keycode(struct input_dev *dev, int sparse_keymap_setup(struct input_dev *dev, const struct key_entry *keymap, int (*setup)(struct input_dev *, struct key_entry *)); -void sparse_keymap_free(struct input_dev *dev); void sparse_keymap_report_entry(struct input_dev *dev, const struct key_entry *ke, unsigned int value, bool autorelease); -- cgit v1.2.3 From 4a78cc644eed3cf2dae00c3a959910a86c140fd6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 26 May 2017 17:10:15 +0200 Subject: mtd: nand: Make sure drivers not supporting SET/GET_FEATURES return -ENOTSUPP A lot of drivers are providing their own ->cmdfunc(), and most of the time this implementation does not support all possible NAND operations. But since ->cmdfunc() cannot return an error code, the core has no way to know that the operation it requested is not supported. This is a problem we cannot address for all kind of operations with the current design, but we can prevent these silent failures for the GET/SET FEATURES operation by overloading the default ->onfi_{set,get}_features() methods with one returning -ENOTSUPP. Reported-by: Chris Packham Signed-off-by: Boris Brezillon Tested-by: Chris Packham --- include/linux/mtd/nand.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 7a01d2eb7443..28f7dd9177e9 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1259,6 +1259,11 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page); +/* Stub used by drivers that do not support GET/SET FEATURES operations */ +int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd, + struct nand_chip *chip, int addr, + u8 *subfeature_param); + /* Default read_page_raw implementation */ int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page); -- cgit v1.2.3 From e45a79da863c199d7c47b1ee6d33cee23c89eac1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 May 2017 09:07:47 +0200 Subject: skbuff/mac80211: introduce and use skb_put_zero() This pattern was introduced a number of times in mac80211 just now, and since it's present in a number of other places it makes sense to add a little helper for it. This just adds the helper and transforms the mac80211 code, a later patch will transform other places. Acked-by: David S. Miller Signed-off-by: Johannes Berg --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bfc7892f6c33..d92056b2da44 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1937,6 +1937,15 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) return tmp; } +static inline unsigned char *skb_put_zero(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp = skb_put(skb, len); + + memset(tmp, 0, len); + + return tmp; +} + unsigned char *skb_push(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) { -- cgit v1.2.3 From 9ae287274817c032a4428fde84d1ab26d6b96761 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 27 May 2017 16:19:28 -0600 Subject: net: add extack arg to lwtunnel build state Pass extack arg down to lwtunnel_build_state and the build_state callbacks. Add messages for failures in lwtunnel_build_state, and add the extarg to nla_parse where possible in the build_state callbacks. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index a68aad484c69..8664fd26eb5d 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -102,6 +102,16 @@ struct netlink_ext_ack { (extack)->bad_attr = (attr); \ } while (0) +#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do { \ + static const char __msg[] = (msg); \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (__extack) { \ + __extack->_msg = __msg; \ + __extack->bad_attr = (attr); \ + } \ +} while (0) + extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); -- cgit v1.2.3 From a4f4fa681add289ebfec6d776376ad7a2ffda669 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 30 May 2017 09:25:48 -0700 Subject: PCI: Cache PRI and PASID bits in pci_dev Device drivers need to check if an IOMMU enabled ATS, PRI and PASID in order to know when they can use the SVM API. Cache PRI and PASID bits in the pci_dev structure, similarly to what is currently done for ATS. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..f612c1d85863 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -356,6 +356,8 @@ struct pci_dev { unsigned int msix_enabled:1; unsigned int ari_enabled:1; /* ARI forwarding */ unsigned int ats_enabled:1; /* Address Translation Service */ + unsigned int pasid_enabled:1; /* Process Address Space ID */ + unsigned int pri_enabled:1; /* Page Request Interface */ unsigned int is_managed:1; unsigned int needs_freset:1; /* Dev requires fundamental reset */ unsigned int state_saved:1; -- cgit v1.2.3 From 4ebeb1ec56d4c54a56b6f43c2603d9a4688c83ba Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Tue, 30 May 2017 09:25:49 -0700 Subject: PCI: Restore PRI and PASID state after Function-Level Reset After a Function-Level Reset, PCI states need to be restored. Save PASID features and PRI reqs cached. [bhelgaas: search for capability only if PRI/PASID were enabled] Signed-off-by: CQ Tang Signed-off-by: Ashok Raj Signed-off-by: Bjorn Helgaas Cc: Joerg Roedel Cc: Jean-Phillipe Brucker Cc: David Woodhouse --- include/linux/pci-ats.h | 10 ++++++++++ include/linux/pci.h | 6 ++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 57e0b8250947..782fb8e0755f 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -7,6 +7,7 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs); void pci_disable_pri(struct pci_dev *pdev); +void pci_restore_pri_state(struct pci_dev *pdev); int pci_reset_pri(struct pci_dev *pdev); #else /* CONFIG_PCI_PRI */ @@ -20,6 +21,10 @@ static inline void pci_disable_pri(struct pci_dev *pdev) { } +static inline void pci_restore_pri_state(struct pci_dev *pdev) +{ +} + static inline int pci_reset_pri(struct pci_dev *pdev) { return -ENODEV; @@ -31,6 +36,7 @@ static inline int pci_reset_pri(struct pci_dev *pdev) int pci_enable_pasid(struct pci_dev *pdev, int features); void pci_disable_pasid(struct pci_dev *pdev); +void pci_restore_pasid_state(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); @@ -45,6 +51,10 @@ static inline void pci_disable_pasid(struct pci_dev *pdev) { } +static inline void pci_restore_pasid_state(struct pci_dev *pdev) +{ +} + static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; diff --git a/include/linux/pci.h b/include/linux/pci.h index f612c1d85863..c7cfdff2529c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -400,6 +400,12 @@ struct pci_dev { u16 ats_cap; /* ATS Capability offset */ u8 ats_stu; /* ATS Smallest Translation Unit */ atomic_t ats_ref_cnt; /* number of VFs with ATS enabled */ +#endif +#ifdef CONFIG_PCI_PRI + u32 pri_reqs_alloc; /* Number of PRI requests allocated */ +#endif +#ifdef CONFIG_PCI_PASID + u16 pasid_features; #endif phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */ size_t romlen; /* Length of ROM if it's not from the BAR */ -- cgit v1.2.3 From c7f3c595f6ff7a1cfbf7ac782722bf5173e27775 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 19 May 2017 15:29:10 -0700 Subject: pstore: Populate pstore record->time field The current time will be initially available in the record->time field for all pstore_read() and pstore_write() calls. Backends can either update the field during read(), or use the field during write() instead of fetching time themselves. Signed-off-by: Kees Cook --- include/linux/pstore.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index e2233f50f428..61f806a7fe29 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -138,7 +138,10 @@ struct pstore_record { * memory allocation may be broken during an Oops. Regardless, * @buf must be proccesed or copied before returning. The * backend is also expected to write @id with something that - 8 can help identify this record to a future @erase callback. + * can help identify this record to a future @erase callback. + * The @time field will be prepopulated with the current time, + * when available. The @size field will have the size of data + * in @buf. * * Returns 0 on success, and non-zero on error. * -- cgit v1.2.3 From 71189fa9b092ef125ee741eccb2f5fa916798afd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 30 May 2017 13:31:27 -0700 Subject: bpf: free up BPF_JMP | BPF_CALL | BPF_X opcode free up BPF_JMP | BPF_CALL | BPF_X opcode to be used by actual indirect call by register and use kernel internal opcode to mark call instruction into bpf_tail_call() helper. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 62d948f80730..a20ba40fcb73 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -57,6 +57,9 @@ struct bpf_prog_aux; #define BPF_REG_AX MAX_BPF_REG #define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) +/* unused opcode to mark special call to bpf_tail_call() helper */ +#define BPF_TAIL_CALL 0xf0 + /* As per nm, we expose JITed images as text (code) section for * kallsyms. That way, tools like perf can find it to match * addresses. -- cgit v1.2.3 From 8726679a0fa317f8e83d0843b266453f31bff092 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 30 May 2017 13:31:29 -0700 Subject: bpf: teach verifier to track stack depth teach verifier to track bpf program stack depth Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6bb38d76faf4..fcc80ca11045 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -171,6 +171,7 @@ struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; u32 max_ctx_offset; + u32 stack_depth; struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; const struct bpf_verifier_ops *ops; -- cgit v1.2.3 From fc3973a1fa090d5f5437621a9ae1f2232a04ee5b Mon Sep 17 00:00:00 2001 From: Woojung Huh Date: Wed, 31 May 2017 20:19:13 +0000 Subject: phy: micrel: add Microchip KSZ 9477 Switch PHY support Adding Microchip 9477 Phy included in KSZ9477 Switch. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Woojung Huh Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index f541da68d1e7..472fa4d4ea62 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -37,6 +37,8 @@ #define PHY_ID_KSZ8795 0x00221550 +#define PHY_ID_KSZ9477 0x00221631 + /* struct phy_device dev_flags definitions */ #define MICREL_PHY_50MHZ_CLK 0x00000001 #define MICREL_PHY_FXEN 0x00000002 -- cgit v1.2.3 From b987e98e50ab90e5291581204ef7a1c649313a70 Mon Sep 17 00:00:00 2001 From: Woojung Huh Date: Wed, 31 May 2017 20:19:19 +0000 Subject: dsa: add DSA switch driver for Microchip KSZ9477 The KSZ9477 is a fully integrated layer 2, managed, 7 ports GigE switch with numerous advanced features. 5 ports incorporate 10/100/1000 Mbps PHYs. The other 2 ports have interfaces that can be configured as SGMII, RGMII, MII or RMII. Either of these may connect directly to a host processor or to an external PHY. The SGMII port may interface to a fiber optic transceiver. This driver currently supports vlan, fdb, mdb & mirror dsa switch operations. Reviewed-by: Florian Fainelli Signed-off-by: Woojung Huh Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/platform_data/microchip-ksz.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/platform_data/microchip-ksz.h (limited to 'include/linux') diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h new file mode 100644 index 000000000000..84789ca634aa --- /dev/null +++ b/include/linux/platform_data/microchip-ksz.h @@ -0,0 +1,29 @@ +/* + * Microchip KSZ series switch platform data + * + * Copyright (C) 2017 + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __MICROCHIP_KSZ_H +#define __MICROCHIP_KSZ_H + +#include + +struct ksz_platform_data { + u32 chip_id; + u16 enabled_ports; +}; + +#endif -- cgit v1.2.3 From 284e76387c38260e834c99b010a68d75fc46b394 Mon Sep 17 00:00:00 2001 From: Rick Altherr Date: Mon, 22 May 2017 14:12:24 -0700 Subject: hw_random: timeriomem_rng: Allow setting RNG quality from platform data When a hw_random device's quality is non-zero, it will automatically be used to fill the kernel's entropy pool. Since timeriomem_rng is used by many different devices, the quality needs to be provided by platform data or device tree. Signed-off-by: Rick Altherr Signed-off-by: Herbert Xu --- include/linux/timeriomem-rng.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h index 46eb27ddbfab..3e00122bcf88 100644 --- a/include/linux/timeriomem-rng.h +++ b/include/linux/timeriomem-rng.h @@ -13,4 +13,7 @@ struct timeriomem_rng_data { /* measures in usecs */ unsigned int period; + + /* bits of entropy per 1024 bits read */ + unsigned int quality; }; -- cgit v1.2.3 From 9748e1d87573c94191442d6bd0307f523e5cd8b8 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sat, 29 Apr 2017 11:06:45 +0200 Subject: mtd: nand: add support for Micron on-die ECC Now that the core NAND subsystem has support for on-die ECC, this commit brings the necessary code to support on-die ECC on Micron NANDs. In micron_nand_init(), we detect if the Micron NAND chip supports on-die ECC mode, by checking a number of conditions: - It must be an ONFI NAND - It must be a SLC NAND - Enabling *and* disabling on-die ECC must work - The on-die ECC must be correcting 4 bits per 512 bytes of data. Some Micron NAND chips have an on-die ECC able to correct 8 bits per 512 bytes of data, but they work slightly differently and therefore we don't support them in this patch. Then, if the on-die ECC cannot be disabled (some Micron NAND have on-die ECC forcefully enabled), we bail out, as we don't support such NANDs. Indeed, the implementation of raw_read()/raw_write() make the assumption that on-die ECC can be disabled. Support for Micron NANDs with on-die ECC forcefully enabled can easily be added, but in the absence of such HW for testing, we preferred to simply bail out. If the on-die ECC is supported, and requested in the Device Tree, then it is indeed enabled, by using custom implementations of the ->read_page(), ->read_page_raw(), ->write_page() and ->write_page_raw() operation to properly handle the on-die ECC. In the non-raw functions, we need to enable the internal ECC engine before issuing the NAND_CMD_READ0 or NAND_CMD_SEQIN commands, which is why we set the NAND_ECC_CUSTOM_PAGE_ACCESS option at initialization time (it asks the NAND core to let the NAND driver issue those commands). Signed-off-by: Thomas Petazzoni Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 28f7dd9177e9..893d0ce08030 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -258,6 +258,8 @@ struct nand_chip; /* Vendor-specific feature address (Micron) */ #define ONFI_FEATURE_ADDR_READ_RETRY 0x89 +#define ONFI_FEATURE_ON_DIE_ECC 0x90 +#define ONFI_FEATURE_ON_DIE_ECC_EN BIT(3) /* ONFI subfeature parameters length */ #define ONFI_SUBFEATURE_PARAM_LEN 4 -- cgit v1.2.3 From 104e442a67cfba4d0cc982384761befb917fb6a1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 16 Mar 2017 09:35:58 +0100 Subject: mtd: nand: Pass the CS line to ->setup_data_interface() Some NAND controllers can assign different NAND timings to different CS lines. Pass the CS line information to ->setup_data_interface() so that the NAND controller driver knows which CS line is concerned by the setup_data_interface() request. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 893d0ce08030..9de3686e738c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -107,6 +107,8 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); #define NAND_STATUS_READY 0x40 #define NAND_STATUS_WP 0x80 +#define NAND_DATA_IFACE_CHECK_ONLY -1 + /* * Constants for ECC_MODES */ @@ -818,7 +820,10 @@ struct nand_manufacturer_ops { * @read_retries: [INTERN] the number of read retry modes supported * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand - * @setup_data_interface: [OPTIONAL] setup the data interface and timing + * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If + * chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this + * means the configuration should not be applied but + * only checked. * @bbt: [INTERN] bad block table pointer * @bbt_td: [REPLACEABLE] bad block table descriptor for flash * lookup. @@ -862,9 +867,8 @@ struct nand_chip { int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode); - int (*setup_data_interface)(struct mtd_info *mtd, - const struct nand_data_interface *conf, - bool check_only); + int (*setup_data_interface)(struct mtd_info *mtd, int chipnr, + const struct nand_data_interface *conf); int chip_delay; -- cgit v1.2.3 From 7d135bcced20be2b50128432c5426a7278ec4f6d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sat, 6 May 2017 18:03:33 +0200 Subject: mtd: nand: Drop the ->errstat() hook The ->errstat() hook is no longer implemented NAND controller drivers. Get rid of it before someone starts abusing it. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9de3686e738c..8b3607bde1b5 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -834,9 +834,6 @@ struct nand_manufacturer_ops { * structure which is shared among multiple independent * devices. * @priv: [OPTIONAL] pointer to private chip data - * @errstat: [OPTIONAL] hardware specific function to perform - * additional error status checks (determine if errors are - * correctable). * @manufacturer: [INTERN] Contains manufacturer information */ @@ -860,8 +857,6 @@ struct nand_chip { int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this); int (*erase)(struct mtd_info *mtd, int page); int (*scan_bbt)(struct mtd_info *mtd); - int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state, - int status, int page); int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, -- cgit v1.2.3 From 858edde001e14f070d0fff347fb56c6c79e15312 Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Thu, 11 May 2017 12:17:41 +0530 Subject: phy: Move ULPI phy header out of drivers to include path Although ULPI phy is currently being used by tusb1210, there can be other consumers too in future. So move this to the includes path for phy. Signed-off-by: Vivek Gautam Cc: Stephen Boyd Cc: Heikki Krogerus Cc: Kishon Vijay Abraham I Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-omap@vger.kernel.org Cc: linux-usb@vger.kernel.org Acked-by: Heikki Krogerus Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/ulpi_phy.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/phy/ulpi_phy.h (limited to 'include/linux') diff --git a/include/linux/phy/ulpi_phy.h b/include/linux/phy/ulpi_phy.h new file mode 100644 index 000000000000..f2ebe490a4bc --- /dev/null +++ b/include/linux/phy/ulpi_phy.h @@ -0,0 +1,31 @@ +#include + +/** + * Helper that registers PHY for a ULPI device and adds a lookup for binding it + * and it's controller, which is always the parent. + */ +static inline struct phy +*ulpi_phy_create(struct ulpi *ulpi, const struct phy_ops *ops) +{ + struct phy *phy; + int ret; + + phy = phy_create(&ulpi->dev, NULL, ops); + if (IS_ERR(phy)) + return phy; + + ret = phy_create_lookup(phy, "usb2-phy", dev_name(ulpi->dev.parent)); + if (ret) { + phy_destroy(phy); + return ERR_PTR(ret); + } + + return phy; +} + +/* Remove a PHY that was created with ulpi_phy_create() and it's lookup. */ +static inline void ulpi_phy_destroy(struct ulpi *ulpi, struct phy *phy) +{ + phy_remove_lookup(phy, "usb2-phy", dev_name(ulpi->dev.parent)); + phy_destroy(phy); +} -- cgit v1.2.3 From debd3a3b27c76c65a7d032b6f01710e6a6d555ab Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 24 May 2017 17:53:54 +0300 Subject: of: Make of_fwnode_handle() safer On the expense of a little bit more complexity in the of_fwnode_handle() macro, make the macro result in NULL in case its argument is NULL while still referencing it only once. Signed-off-by: Sakari Ailus Reviewed-by: Kieran Bingham Signed-off-by: Rob Herring --- include/linux/of.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 50fcdb54087f..f05c87f0c495 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -159,7 +159,13 @@ static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) container_of(fwnode, struct device_node, fwnode) : NULL; } -#define of_fwnode_handle(node) (&(node)->fwnode) +#define of_fwnode_handle(node) \ + ({ \ + typeof(node) __of_fwnode_handle_node = (node); \ + \ + __of_fwnode_handle_node ? \ + &__of_fwnode_handle_node->fwnode : NULL; \ + }) static inline bool of_have_populated_dt(void) { -- cgit v1.2.3 From d20dc1493db438fbbfb7733adc82f472dd8a0789 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 24 May 2017 17:53:55 +0300 Subject: of: Support const and non-const use for to_of_node() Turn to_of_node() into a macro in order to support both const and non-const use. Additionally make the fwnode argument to is_of_node() const as well. Signed-off-by: Sakari Ailus Reviewed-by: Kieran Bingham Signed-off-by: Rob Herring --- include/linux/of.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index f05c87f0c495..29b7b738b509 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -148,16 +148,20 @@ extern raw_spinlock_t devtree_lock; #ifdef CONFIG_OF void of_core_init(void); -static inline bool is_of_node(struct fwnode_handle *fwnode) +static inline bool is_of_node(const struct fwnode_handle *fwnode) { return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_OF; } -static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) -{ - return is_of_node(fwnode) ? - container_of(fwnode, struct device_node, fwnode) : NULL; -} +#define to_of_node(__fwnode) \ + ({ \ + typeof(__fwnode) __to_of_node_fwnode = (__fwnode); \ + \ + is_of_node(__to_of_node_fwnode) ? \ + container_of(__to_of_node_fwnode, \ + struct device_node, fwnode) : \ + NULL; \ + }) #define of_fwnode_handle(node) \ ({ \ @@ -539,12 +543,12 @@ static inline void of_core_init(void) { } -static inline bool is_of_node(struct fwnode_handle *fwnode) +static inline bool is_of_node(const struct fwnode_handle *fwnode) { return false; } -static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) +static inline struct device_node *to_of_node(const struct fwnode_handle *fwnode) { return NULL; } -- cgit v1.2.3 From 726fdbe9fa7ebccda1579716f68f8bae6fa9c87a Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Thu, 1 Jun 2017 15:29:06 +0300 Subject: qed: Encapsulate interrupt counters in struct We already have an API struct that contains interrupt-related numbers. Use it to encapsulate all information relating to the status of SBs as (used|free). Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 73c46d6d5727..607e1c5e185a 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -886,9 +886,15 @@ struct qed_eth_stats { #define TX_PI(tc) (RX_PI + 1 + tc) struct qed_sb_cnt_info { - int sb_cnt; - int sb_iov_cnt; - int sb_free_blk; + /* Original, current, and free SBs for PF */ + int orig; + int cnt; + int free_cnt; + + /* Original, current and free SBS for child VFs */ + int iov_orig; + int iov_cnt; + int free_cnt_iov; }; static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info) -- cgit v1.2.3 From ec33d71de7309c50531c2ae0eb178244899e6e46 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 31 May 2017 09:18:33 +0200 Subject: net-next: stmmac: add optional setup function Instead of adding more ifthen logic for adding a new mac_device_info setup function, it is easier to add a function pointer to the function needed. Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 3921cb9dfadb..8bb550bca96d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -177,6 +177,7 @@ struct plat_stmmacenet_data { void (*fix_mac_speed)(void *priv, unsigned int speed); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); + struct mac_device_info *(*setup)(void *priv); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; -- cgit v1.2.3 From 9f93ac8d4085f718d3c7c5fedcb98dbdd2287648 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 31 May 2017 09:18:36 +0200 Subject: net-next: stmmac: Add dwmac-sun8i The dwmac-sun8i is a heavy hacked version of stmmac hardware by allwinner. In fact the only common part is the descriptor management and the first register function. Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 8bb550bca96d..108739ff9223 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -186,6 +186,7 @@ struct plat_stmmacenet_data { struct reset_control *stmmac_rst; struct stmmac_axi *axi; int has_gmac4; + bool has_sun8i; bool tso_en; int mac_port_sel_speed; bool en_tx_lpi_clockgating; -- cgit v1.2.3 From 9efc160f4bbd69b17b48edec53067537d04e62b7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 31 May 2017 14:43:46 -0700 Subject: block: Introduce queue flag QUEUE_FLAG_SCSI_PASSTHROUGH From the context where a SCSI command is submitted it is not always possible to figure out whether or not the queue the command is submitted to has struct scsi_request as the first member of its private data. Hence introduce the flag QUEUE_FLAG_SCSI_PASSTHROUGH. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Cc: Omar Sandoval Cc: Don Brace Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ab92c4ea138b..019f18c65098 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -618,6 +618,7 @@ struct request_queue { #define QUEUE_FLAG_STATS 27 /* track rq completion times */ #define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */ +#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -708,6 +709,8 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_secure_erase(q) \ (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) +#define blk_queue_scsi_passthrough(q) \ + test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags) #define blk_noretry_request(rq) \ ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ -- cgit v1.2.3 From 681bdf80cff6844f81216b6b05516d82f69c23fd Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 11 May 2017 11:23:09 -0700 Subject: i40e/i40evf: create and use new unified header file This moves a header for i40evf to include/linux/avf/virtchnl.h. The directory name AVF is an acronym for the Intel(R) Adaptive Virtual Function. This first step creates the new file, which is a rename of drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h to include/linux/avf/virtchnl.h, and should show up in git as a rename when using git log --follow. To keep things building after the move, the changes to the i40evf driver are made to point to the new include file location. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 446 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 446 insertions(+) create mode 100644 include/linux/avf/virtchnl.h (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h new file mode 100644 index 000000000000..7d6da3ac24f4 --- /dev/null +++ b/include/linux/avf/virtchnl.h @@ -0,0 +1,446 @@ +/******************************************************************************* + * + * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver + * Copyright(c) 2013 - 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Contact Information: + * e1000-devel Mailing List + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + ******************************************************************************/ + +#ifndef _I40E_VIRTCHNL_H_ +#define _I40E_VIRTCHNL_H_ + +/* Description: + * This header file describes the VF-PF communication protocol used + * by the various i40e drivers. + * + * Admin queue buffer usage: + * desc->opcode is always i40e_aqc_opc_send_msg_to_pf + * flags, retval, datalen, and data addr are all used normally. + * Firmware copies the cookie fields when sending messages between the PF and + * VF, but uses all other fields internally. Due to this limitation, we + * must send all messages as "indirect", i.e. using an external buffer. + * + * All the vsi indexes are relative to the VF. Each VF can have maximum of + * three VSIs. All the queue indexes are relative to the VSI. Each VF can + * have a maximum of sixteen queues for all of its VSIs. + * + * The PF is required to return a status code in v_retval for all messages + * except RESET_VF, which does not require any response. The return value is of + * i40e_status_code type, defined in the i40e_type.h. + * + * In general, VF driver initialization should roughly follow the order of these + * opcodes. The VF driver must first validate the API version of the PF driver, + * then request a reset, then get resources, then configure queues and + * interrupts. After these operations are complete, the VF driver may start + * its queues, optionally add MAC and VLAN filters, and process traffic. + */ + +/* Opcodes for VF-PF communication. These are placed in the v_opcode field + * of the virtchnl_msg structure. + */ +enum i40e_virtchnl_ops { +/* The PF sends status change events to VFs using + * the I40E_VIRTCHNL_OP_EVENT opcode. + * VFs send requests to the PF using the other ops. + */ + I40E_VIRTCHNL_OP_UNKNOWN = 0, + I40E_VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */ + I40E_VIRTCHNL_OP_RESET_VF = 2, + I40E_VIRTCHNL_OP_GET_VF_RESOURCES = 3, + I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE = 4, + I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE = 5, + I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES = 6, + I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP = 7, + I40E_VIRTCHNL_OP_ENABLE_QUEUES = 8, + I40E_VIRTCHNL_OP_DISABLE_QUEUES = 9, + I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS = 10, + I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS = 11, + I40E_VIRTCHNL_OP_ADD_VLAN = 12, + I40E_VIRTCHNL_OP_DEL_VLAN = 13, + I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14, + I40E_VIRTCHNL_OP_GET_STATS = 15, + I40E_VIRTCHNL_OP_RSVD = 16, + I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ + I40E_VIRTCHNL_OP_IWARP = 20, + I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, + I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22, + I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23, + I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24, + I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, + I40E_VIRTCHNL_OP_SET_RSS_HENA = 26, + +}; + +/* Virtual channel message descriptor. This overlays the admin queue + * descriptor. All other data is passed in external buffers. + */ + +struct i40e_virtchnl_msg { + u8 pad[8]; /* AQ flags/opcode/len/retval fields */ + enum i40e_virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */ + i40e_status v_retval; /* ditto for desc->retval */ + u32 vfid; /* used by PF when sending to VF */ +}; + +/* Message descriptions and data structures.*/ + +/* I40E_VIRTCHNL_OP_VERSION + * VF posts its version number to the PF. PF responds with its version number + * in the same format, along with a return code. + * Reply from PF has its major/minor versions also in param0 and param1. + * If there is a major version mismatch, then the VF cannot operate. + * If there is a minor version mismatch, then the VF can operate but should + * add a warning to the system log. + * + * This enum element MUST always be specified as == 1, regardless of other + * changes in the API. The PF must always respond to this message without + * error regardless of version mismatch. + */ +#define I40E_VIRTCHNL_VERSION_MAJOR 1 +#define I40E_VIRTCHNL_VERSION_MINOR 1 +#define I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS 0 + +struct i40e_virtchnl_version_info { + u32 major; + u32 minor; +}; + +/* I40E_VIRTCHNL_OP_RESET_VF + * VF sends this request to PF with no parameters + * PF does NOT respond! VF driver must delay then poll VFGEN_RSTAT register + * until reset completion is indicated. The admin queue must be reinitialized + * after this operation. + * + * When reset is complete, PF must ensure that all queues in all VSIs associated + * with the VF are stopped, all queue configurations in the HMC are set to 0, + * and all MAC and VLAN filters (except the default MAC address) on all VSIs + * are cleared. + */ + +/* I40E_VIRTCHNL_OP_GET_VF_RESOURCES + * Version 1.0 VF sends this request to PF with no parameters + * Version 1.1 VF sends this request to PF with u32 bitmap of its capabilities + * PF responds with an indirect message containing + * i40e_virtchnl_vf_resource and one or more + * i40e_virtchnl_vsi_resource structures. + */ + +struct i40e_virtchnl_vsi_resource { + u16 vsi_id; + u16 num_queue_pairs; + enum i40e_vsi_type vsi_type; + u16 qset_handle; + u8 default_mac_addr[ETH_ALEN]; +}; +/* VF offload flags */ +#define I40E_VIRTCHNL_VF_OFFLOAD_L2 0x00000001 +#define I40E_VIRTCHNL_VF_OFFLOAD_IWARP 0x00000002 +#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008 +#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 +#define I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 +#define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 +#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 +#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 +#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 + +#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \ + I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \ + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) + +struct i40e_virtchnl_vf_resource { + u16 num_vsis; + u16 num_queue_pairs; + u16 max_vectors; + u16 max_mtu; + + u32 vf_offload_flags; + u32 rss_key_size; + u32 rss_lut_size; + + struct i40e_virtchnl_vsi_resource vsi_res[1]; +}; + +/* I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE + * VF sends this message to set up parameters for one TX queue. + * External data buffer contains one instance of i40e_virtchnl_txq_info. + * PF configures requested queue and returns a status code. + */ + +/* Tx queue config info */ +struct i40e_virtchnl_txq_info { + u16 vsi_id; + u16 queue_id; + u16 ring_len; /* number of descriptors, multiple of 8 */ + u16 headwb_enabled; + u64 dma_ring_addr; + u64 dma_headwb_addr; +}; + +/* I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE + * VF sends this message to set up parameters for one RX queue. + * External data buffer contains one instance of i40e_virtchnl_rxq_info. + * PF configures requested queue and returns a status code. + */ + +/* Rx queue config info */ +struct i40e_virtchnl_rxq_info { + u16 vsi_id; + u16 queue_id; + u32 ring_len; /* number of descriptors, multiple of 32 */ + u16 hdr_size; + u16 splithdr_enabled; + u32 databuffer_size; + u32 max_pkt_size; + u64 dma_ring_addr; + enum i40e_hmc_obj_rx_hsplit_0 rx_split_pos; +}; + +/* I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES + * VF sends this message to set parameters for all active TX and RX queues + * associated with the specified VSI. + * PF configures queues and returns status. + * If the number of queues specified is greater than the number of queues + * associated with the VSI, an error is returned and no queues are configured. + */ +struct i40e_virtchnl_queue_pair_info { + /* NOTE: vsi_id and queue_id should be identical for both queues. */ + struct i40e_virtchnl_txq_info txq; + struct i40e_virtchnl_rxq_info rxq; +}; + +struct i40e_virtchnl_vsi_queue_config_info { + u16 vsi_id; + u16 num_queue_pairs; + struct i40e_virtchnl_queue_pair_info qpair[1]; +}; + +/* I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP + * VF uses this message to map vectors to queues. + * The rxq_map and txq_map fields are bitmaps used to indicate which queues + * are to be associated with the specified vector. + * The "other" causes are always mapped to vector 0. + * PF configures interrupt mapping and returns status. + */ +struct i40e_virtchnl_vector_map { + u16 vsi_id; + u16 vector_id; + u16 rxq_map; + u16 txq_map; + u16 rxitr_idx; + u16 txitr_idx; +}; + +struct i40e_virtchnl_irq_map_info { + u16 num_vectors; + struct i40e_virtchnl_vector_map vecmap[1]; +}; + +/* I40E_VIRTCHNL_OP_ENABLE_QUEUES + * I40E_VIRTCHNL_OP_DISABLE_QUEUES + * VF sends these message to enable or disable TX/RX queue pairs. + * The queues fields are bitmaps indicating which queues to act upon. + * (Currently, we only support 16 queues per VF, but we make the field + * u32 to allow for expansion.) + * PF performs requested action and returns status. + */ +struct i40e_virtchnl_queue_select { + u16 vsi_id; + u16 pad; + u32 rx_queues; + u32 tx_queues; +}; + +/* I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS + * VF sends this message in order to add one or more unicast or multicast + * address filters for the specified VSI. + * PF adds the filters and returns status. + */ + +/* I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS + * VF sends this message in order to remove one or more unicast or multicast + * filters for the specified VSI. + * PF removes the filters and returns status. + */ + +struct i40e_virtchnl_ether_addr { + u8 addr[ETH_ALEN]; + u8 pad[2]; +}; + +struct i40e_virtchnl_ether_addr_list { + u16 vsi_id; + u16 num_elements; + struct i40e_virtchnl_ether_addr list[1]; +}; + +/* I40E_VIRTCHNL_OP_ADD_VLAN + * VF sends this message to add one or more VLAN tag filters for receives. + * PF adds the filters and returns status. + * If a port VLAN is configured by the PF, this operation will return an + * error to the VF. + */ + +/* I40E_VIRTCHNL_OP_DEL_VLAN + * VF sends this message to remove one or more VLAN tag filters for receives. + * PF removes the filters and returns status. + * If a port VLAN is configured by the PF, this operation will return an + * error to the VF. + */ + +struct i40e_virtchnl_vlan_filter_list { + u16 vsi_id; + u16 num_elements; + u16 vlan_id[1]; +}; + +/* I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE + * VF sends VSI id and flags. + * PF returns status code in retval. + * Note: we assume that broadcast accept mode is always enabled. + */ +struct i40e_virtchnl_promisc_info { + u16 vsi_id; + u16 flags; +}; + +#define I40E_FLAG_VF_UNICAST_PROMISC 0x00000001 +#define I40E_FLAG_VF_MULTICAST_PROMISC 0x00000002 + +/* I40E_VIRTCHNL_OP_GET_STATS + * VF sends this message to request stats for the selected VSI. VF uses + * the i40e_virtchnl_queue_select struct to specify the VSI. The queue_id + * field is ignored by the PF. + * + * PF replies with struct i40e_eth_stats in an external buffer. + */ + +/* I40E_VIRTCHNL_OP_CONFIG_RSS_KEY + * I40E_VIRTCHNL_OP_CONFIG_RSS_LUT + * VF sends these messages to configure RSS. Only supported if both PF + * and VF drivers set the I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF bit during + * configuration negotiation. If this is the case, then the RSS fields in + * the VF resource struct are valid. + * Both the key and LUT are initialized to 0 by the PF, meaning that + * RSS is effectively disabled until set up by the VF. + */ +struct i40e_virtchnl_rss_key { + u16 vsi_id; + u16 key_len; + u8 key[1]; /* RSS hash key, packed bytes */ +}; + +struct i40e_virtchnl_rss_lut { + u16 vsi_id; + u16 lut_entries; + u8 lut[1]; /* RSS lookup table*/ +}; + +/* I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS + * I40E_VIRTCHNL_OP_SET_RSS_HENA + * VF sends these messages to get and set the hash filter enable bits for RSS. + * By default, the PF sets these to all possible traffic types that the + * hardware supports. The VF can query this value if it wants to change the + * traffic types that are hashed by the hardware. + * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h + */ +struct i40e_virtchnl_rss_hena { + u64 hena; +}; + +/* I40E_VIRTCHNL_OP_EVENT + * PF sends this message to inform the VF driver of events that may affect it. + * No direct response is expected from the VF, though it may generate other + * messages in response to this one. + */ +enum i40e_virtchnl_event_codes { + I40E_VIRTCHNL_EVENT_UNKNOWN = 0, + I40E_VIRTCHNL_EVENT_LINK_CHANGE, + I40E_VIRTCHNL_EVENT_RESET_IMPENDING, + I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE, +}; +#define I40E_PF_EVENT_SEVERITY_INFO 0 +#define I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM 255 + +struct i40e_virtchnl_pf_event { + enum i40e_virtchnl_event_codes event; + union { + struct { + enum i40e_aq_link_speed link_speed; + bool link_status; + } link_event; + } event_data; + + int severity; +}; + +/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP + * VF uses this message to request PF to map IWARP vectors to IWARP queues. + * The request for this originates from the VF IWARP driver through + * a client interface between VF LAN and VF IWARP driver. + * A vector could have an AEQ and CEQ attached to it although + * there is a single AEQ per VF IWARP instance in which case + * most vectors will have an INVALID_IDX for aeq and valid idx for ceq. + * There will never be a case where there will be multiple CEQs attached + * to a single vector. + * PF configures interrupt mapping and returns status. + */ + +/* HW does not define a type value for AEQ; only for RX/TX and CEQ. + * In order for us to keep the interface simple, SW will define a + * unique type value for AEQ. + */ +#define I40E_QUEUE_TYPE_PE_AEQ 0x80 +#define I40E_QUEUE_INVALID_IDX 0xFFFF + +struct i40e_virtchnl_iwarp_qv_info { + u32 v_idx; /* msix_vector */ + u16 ceq_idx; + u16 aeq_idx; + u8 itr_idx; +}; + +struct i40e_virtchnl_iwarp_qvlist_info { + u32 num_vectors; + struct i40e_virtchnl_iwarp_qv_info qv_info[1]; +}; + +/* VF reset states - these are written into the RSTAT register: + * I40E_VFGEN_RSTAT1 on the PF + * I40E_VFGEN_RSTAT on the VF + * When the PF initiates a reset, it writes 0 + * When the reset is complete, it writes 1 + * When the PF detects that the VF has recovered, it writes 2 + * VF checks this register periodically to determine if a reset has occurred, + * then polls it to know when the reset is complete. + * If either the PF or VF reads the register while the hardware + * is in a reset state, it will return DEADBEEF, which, when masked + * will result in 3. + */ +enum i40e_vfr_states { + I40E_VFR_INPROGRESS = 0, + I40E_VFR_COMPLETED, + I40E_VFR_VFACTIVE, + I40E_VFR_UNKNOWN, +}; + +#endif /* _I40E_VIRTCHNL_H_ */ -- cgit v1.2.3 From 310a2ad92e3fd9139e3641464f1de113fa89825b Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 11 May 2017 11:23:11 -0700 Subject: virtchnl: rename i40e to generic virtchnl This morphs all the i40e and i40evf references to/in virtchnl.h to be generic, using only automated methods. Updates all the callers to use the new names. A followup patch provides separate clean ups for messy line conversions from these "automatic" changes, to make them more reviewable. Was executed with the following sed script: sed -i -f transform_script drivers/net/ethernet/intel/i40e/i40e_client.c sed -i -f transform_script drivers/net/ethernet/intel/i40e/i40e_prototype.h sed -i -f transform_script drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c sed -i -f transform_script drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40e_common.c sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40e_prototype.h sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40evf.h sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40evf_client.c sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40evf_main.c sed -i -f transform_script drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c sed -i -f transform_script include/linux/avf/virtchnl.h transform_script: ----8<---- s/I40E_VIRTCHNL_SUPPORTED_QTYPES/SAVE_ME_SUPPORTED_QTYPES/g s/I40E_VIRTCHNL_VF_CAP/SAVE_ME_VF_CAP/g s/I40E_VIRTCHNL_/VIRTCHNL_/g s/i40e_virtchnl_/virtchnl_/g s/i40e_vfr_/virtchnl_vfr_/g s/I40E_VFR_/VIRTCHNL_VFR_/g s/VIRTCHNL_OP_ADD_ETHER_ADDRESS/VIRTCHNL_OP_ADD_ETH_ADDR/g s/VIRTCHNL_OP_DEL_ETHER_ADDRESS/VIRTCHNL_OP_DEL_ETH_ADDR/g s/VIRTCHNL_OP_FCOE/VIRTCHNL_OP_RSVD/g s/SAVE_ME_SUPPORTED_QTYPES/I40E_VIRTCHNL_SUPPORTED_QTYPES/g s/SAVE_ME_VF_CAP/I40E_VIRTCHNL_VF_CAP/g ----8<---- Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 233 ++++++++++++++++++++++--------------------- 1 file changed, 117 insertions(+), 116 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 7d6da3ac24f4..a8b616121960 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -24,8 +24,8 @@ * ******************************************************************************/ -#ifndef _I40E_VIRTCHNL_H_ -#define _I40E_VIRTCHNL_H_ +#ifndef _VIRTCHNL_H_ +#define _VIRTCHNL_H_ /* Description: * This header file describes the VF-PF communication protocol used @@ -56,36 +56,36 @@ /* Opcodes for VF-PF communication. These are placed in the v_opcode field * of the virtchnl_msg structure. */ -enum i40e_virtchnl_ops { +enum virtchnl_ops { /* The PF sends status change events to VFs using - * the I40E_VIRTCHNL_OP_EVENT opcode. + * the VIRTCHNL_OP_EVENT opcode. * VFs send requests to the PF using the other ops. */ - I40E_VIRTCHNL_OP_UNKNOWN = 0, - I40E_VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */ - I40E_VIRTCHNL_OP_RESET_VF = 2, - I40E_VIRTCHNL_OP_GET_VF_RESOURCES = 3, - I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE = 4, - I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE = 5, - I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES = 6, - I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP = 7, - I40E_VIRTCHNL_OP_ENABLE_QUEUES = 8, - I40E_VIRTCHNL_OP_DISABLE_QUEUES = 9, - I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS = 10, - I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS = 11, - I40E_VIRTCHNL_OP_ADD_VLAN = 12, - I40E_VIRTCHNL_OP_DEL_VLAN = 13, - I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14, - I40E_VIRTCHNL_OP_GET_STATS = 15, - I40E_VIRTCHNL_OP_RSVD = 16, - I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ - I40E_VIRTCHNL_OP_IWARP = 20, - I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, - I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22, - I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23, - I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24, - I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, - I40E_VIRTCHNL_OP_SET_RSS_HENA = 26, + VIRTCHNL_OP_UNKNOWN = 0, + VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */ + VIRTCHNL_OP_RESET_VF = 2, + VIRTCHNL_OP_GET_VF_RESOURCES = 3, + VIRTCHNL_OP_CONFIG_TX_QUEUE = 4, + VIRTCHNL_OP_CONFIG_RX_QUEUE = 5, + VIRTCHNL_OP_CONFIG_VSI_QUEUES = 6, + VIRTCHNL_OP_CONFIG_IRQ_MAP = 7, + VIRTCHNL_OP_ENABLE_QUEUES = 8, + VIRTCHNL_OP_DISABLE_QUEUES = 9, + VIRTCHNL_OP_ADD_ETH_ADDR = 10, + VIRTCHNL_OP_DEL_ETH_ADDR = 11, + VIRTCHNL_OP_ADD_VLAN = 12, + VIRTCHNL_OP_DEL_VLAN = 13, + VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14, + VIRTCHNL_OP_GET_STATS = 15, + VIRTCHNL_OP_RSVD = 16, + VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ + VIRTCHNL_OP_IWARP = 20, + VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, + VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22, + VIRTCHNL_OP_CONFIG_RSS_KEY = 23, + VIRTCHNL_OP_CONFIG_RSS_LUT = 24, + VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, + VIRTCHNL_OP_SET_RSS_HENA = 26, }; @@ -93,16 +93,16 @@ enum i40e_virtchnl_ops { * descriptor. All other data is passed in external buffers. */ -struct i40e_virtchnl_msg { +struct virtchnl_msg { u8 pad[8]; /* AQ flags/opcode/len/retval fields */ - enum i40e_virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */ + enum virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */ i40e_status v_retval; /* ditto for desc->retval */ u32 vfid; /* used by PF when sending to VF */ }; /* Message descriptions and data structures.*/ -/* I40E_VIRTCHNL_OP_VERSION +/* VIRTCHNL_OP_VERSION * VF posts its version number to the PF. PF responds with its version number * in the same format, along with a return code. * Reply from PF has its major/minor versions also in param0 and param1. @@ -114,16 +114,16 @@ struct i40e_virtchnl_msg { * changes in the API. The PF must always respond to this message without * error regardless of version mismatch. */ -#define I40E_VIRTCHNL_VERSION_MAJOR 1 -#define I40E_VIRTCHNL_VERSION_MINOR 1 -#define I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS 0 +#define VIRTCHNL_VERSION_MAJOR 1 +#define VIRTCHNL_VERSION_MINOR 1 +#define VIRTCHNL_VERSION_MINOR_NO_VF_CAPS 0 -struct i40e_virtchnl_version_info { +struct virtchnl_version_info { u32 major; u32 minor; }; -/* I40E_VIRTCHNL_OP_RESET_VF +/* VIRTCHNL_OP_RESET_VF * VF sends this request to PF with no parameters * PF does NOT respond! VF driver must delay then poll VFGEN_RSTAT register * until reset completion is indicated. The admin queue must be reinitialized @@ -135,15 +135,15 @@ struct i40e_virtchnl_version_info { * are cleared. */ -/* I40E_VIRTCHNL_OP_GET_VF_RESOURCES +/* VIRTCHNL_OP_GET_VF_RESOURCES * Version 1.0 VF sends this request to PF with no parameters * Version 1.1 VF sends this request to PF with u32 bitmap of its capabilities * PF responds with an indirect message containing - * i40e_virtchnl_vf_resource and one or more - * i40e_virtchnl_vsi_resource structures. + * virtchnl_vf_resource and one or more + * virtchnl_vsi_resource structures. */ -struct i40e_virtchnl_vsi_resource { +struct virtchnl_vsi_resource { u16 vsi_id; u16 num_queue_pairs; enum i40e_vsi_type vsi_type; @@ -151,23 +151,24 @@ struct i40e_virtchnl_vsi_resource { u8 default_mac_addr[ETH_ALEN]; }; /* VF offload flags */ -#define I40E_VIRTCHNL_VF_OFFLOAD_L2 0x00000001 -#define I40E_VIRTCHNL_VF_OFFLOAD_IWARP 0x00000002 -#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008 -#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 -#define I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 -#define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 -#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 -#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 -#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 -#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000 -#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 - -#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \ - I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \ - I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) - -struct i40e_virtchnl_vf_resource { +#define VIRTCHNL_VF_OFFLOAD_L2 0x00000001 +#define VIRTCHNL_VF_OFFLOAD_IWARP 0x00000002 +#define VIRTCHNL_VF_OFFLOAD_FCOE 0x00000004 +#define VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008 +#define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 +#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 +#define VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 +#define VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 +#define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 +#define VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000 +#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 + +#define I40E_VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ + VIRTCHNL_VF_OFFLOAD_VLAN | \ + VIRTCHNL_VF_OFFLOAD_RSS_PF) + +struct virtchnl_vf_resource { u16 num_vsis; u16 num_queue_pairs; u16 max_vectors; @@ -177,17 +178,17 @@ struct i40e_virtchnl_vf_resource { u32 rss_key_size; u32 rss_lut_size; - struct i40e_virtchnl_vsi_resource vsi_res[1]; + struct virtchnl_vsi_resource vsi_res[1]; }; -/* I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE +/* VIRTCHNL_OP_CONFIG_TX_QUEUE * VF sends this message to set up parameters for one TX queue. - * External data buffer contains one instance of i40e_virtchnl_txq_info. + * External data buffer contains one instance of virtchnl_txq_info. * PF configures requested queue and returns a status code. */ /* Tx queue config info */ -struct i40e_virtchnl_txq_info { +struct virtchnl_txq_info { u16 vsi_id; u16 queue_id; u16 ring_len; /* number of descriptors, multiple of 8 */ @@ -196,14 +197,14 @@ struct i40e_virtchnl_txq_info { u64 dma_headwb_addr; }; -/* I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE +/* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. - * External data buffer contains one instance of i40e_virtchnl_rxq_info. + * External data buffer contains one instance of virtchnl_rxq_info. * PF configures requested queue and returns a status code. */ /* Rx queue config info */ -struct i40e_virtchnl_rxq_info { +struct virtchnl_rxq_info { u16 vsi_id; u16 queue_id; u32 ring_len; /* number of descriptors, multiple of 32 */ @@ -215,33 +216,33 @@ struct i40e_virtchnl_rxq_info { enum i40e_hmc_obj_rx_hsplit_0 rx_split_pos; }; -/* I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES +/* VIRTCHNL_OP_CONFIG_VSI_QUEUES * VF sends this message to set parameters for all active TX and RX queues * associated with the specified VSI. * PF configures queues and returns status. * If the number of queues specified is greater than the number of queues * associated with the VSI, an error is returned and no queues are configured. */ -struct i40e_virtchnl_queue_pair_info { +struct virtchnl_queue_pair_info { /* NOTE: vsi_id and queue_id should be identical for both queues. */ - struct i40e_virtchnl_txq_info txq; - struct i40e_virtchnl_rxq_info rxq; + struct virtchnl_txq_info txq; + struct virtchnl_rxq_info rxq; }; -struct i40e_virtchnl_vsi_queue_config_info { +struct virtchnl_vsi_queue_config_info { u16 vsi_id; u16 num_queue_pairs; - struct i40e_virtchnl_queue_pair_info qpair[1]; + struct virtchnl_queue_pair_info qpair[1]; }; -/* I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP +/* VIRTCHNL_OP_CONFIG_IRQ_MAP * VF uses this message to map vectors to queues. * The rxq_map and txq_map fields are bitmaps used to indicate which queues * are to be associated with the specified vector. * The "other" causes are always mapped to vector 0. * PF configures interrupt mapping and returns status. */ -struct i40e_virtchnl_vector_map { +struct virtchnl_vector_map { u16 vsi_id; u16 vector_id; u16 rxq_map; @@ -250,75 +251,75 @@ struct i40e_virtchnl_vector_map { u16 txitr_idx; }; -struct i40e_virtchnl_irq_map_info { +struct virtchnl_irq_map_info { u16 num_vectors; - struct i40e_virtchnl_vector_map vecmap[1]; + struct virtchnl_vector_map vecmap[1]; }; -/* I40E_VIRTCHNL_OP_ENABLE_QUEUES - * I40E_VIRTCHNL_OP_DISABLE_QUEUES +/* VIRTCHNL_OP_ENABLE_QUEUES + * VIRTCHNL_OP_DISABLE_QUEUES * VF sends these message to enable or disable TX/RX queue pairs. * The queues fields are bitmaps indicating which queues to act upon. * (Currently, we only support 16 queues per VF, but we make the field * u32 to allow for expansion.) * PF performs requested action and returns status. */ -struct i40e_virtchnl_queue_select { +struct virtchnl_queue_select { u16 vsi_id; u16 pad; u32 rx_queues; u32 tx_queues; }; -/* I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS +/* VIRTCHNL_OP_ADD_ETH_ADDR * VF sends this message in order to add one or more unicast or multicast * address filters for the specified VSI. * PF adds the filters and returns status. */ -/* I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS +/* VIRTCHNL_OP_DEL_ETH_ADDR * VF sends this message in order to remove one or more unicast or multicast * filters for the specified VSI. * PF removes the filters and returns status. */ -struct i40e_virtchnl_ether_addr { +struct virtchnl_ether_addr { u8 addr[ETH_ALEN]; u8 pad[2]; }; -struct i40e_virtchnl_ether_addr_list { +struct virtchnl_ether_addr_list { u16 vsi_id; u16 num_elements; - struct i40e_virtchnl_ether_addr list[1]; + struct virtchnl_ether_addr list[1]; }; -/* I40E_VIRTCHNL_OP_ADD_VLAN +/* VIRTCHNL_OP_ADD_VLAN * VF sends this message to add one or more VLAN tag filters for receives. * PF adds the filters and returns status. * If a port VLAN is configured by the PF, this operation will return an * error to the VF. */ -/* I40E_VIRTCHNL_OP_DEL_VLAN +/* VIRTCHNL_OP_DEL_VLAN * VF sends this message to remove one or more VLAN tag filters for receives. * PF removes the filters and returns status. * If a port VLAN is configured by the PF, this operation will return an * error to the VF. */ -struct i40e_virtchnl_vlan_filter_list { +struct virtchnl_vlan_filter_list { u16 vsi_id; u16 num_elements; u16 vlan_id[1]; }; -/* I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE +/* VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE * VF sends VSI id and flags. * PF returns status code in retval. * Note: we assume that broadcast accept mode is always enabled. */ -struct i40e_virtchnl_promisc_info { +struct virtchnl_promisc_info { u16 vsi_id; u16 flags; }; @@ -326,63 +327,63 @@ struct i40e_virtchnl_promisc_info { #define I40E_FLAG_VF_UNICAST_PROMISC 0x00000001 #define I40E_FLAG_VF_MULTICAST_PROMISC 0x00000002 -/* I40E_VIRTCHNL_OP_GET_STATS +/* VIRTCHNL_OP_GET_STATS * VF sends this message to request stats for the selected VSI. VF uses - * the i40e_virtchnl_queue_select struct to specify the VSI. The queue_id + * the virtchnl_queue_select struct to specify the VSI. The queue_id * field is ignored by the PF. * * PF replies with struct i40e_eth_stats in an external buffer. */ -/* I40E_VIRTCHNL_OP_CONFIG_RSS_KEY - * I40E_VIRTCHNL_OP_CONFIG_RSS_LUT +/* VIRTCHNL_OP_CONFIG_RSS_KEY + * VIRTCHNL_OP_CONFIG_RSS_LUT * VF sends these messages to configure RSS. Only supported if both PF - * and VF drivers set the I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF bit during + * and VF drivers set the VIRTCHNL_VF_OFFLOAD_RSS_PF bit during * configuration negotiation. If this is the case, then the RSS fields in * the VF resource struct are valid. * Both the key and LUT are initialized to 0 by the PF, meaning that * RSS is effectively disabled until set up by the VF. */ -struct i40e_virtchnl_rss_key { +struct virtchnl_rss_key { u16 vsi_id; u16 key_len; u8 key[1]; /* RSS hash key, packed bytes */ }; -struct i40e_virtchnl_rss_lut { +struct virtchnl_rss_lut { u16 vsi_id; u16 lut_entries; u8 lut[1]; /* RSS lookup table*/ }; -/* I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS - * I40E_VIRTCHNL_OP_SET_RSS_HENA +/* VIRTCHNL_OP_GET_RSS_HENA_CAPS + * VIRTCHNL_OP_SET_RSS_HENA * VF sends these messages to get and set the hash filter enable bits for RSS. * By default, the PF sets these to all possible traffic types that the * hardware supports. The VF can query this value if it wants to change the * traffic types that are hashed by the hardware. * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h */ -struct i40e_virtchnl_rss_hena { +struct virtchnl_rss_hena { u64 hena; }; -/* I40E_VIRTCHNL_OP_EVENT +/* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other * messages in response to this one. */ -enum i40e_virtchnl_event_codes { - I40E_VIRTCHNL_EVENT_UNKNOWN = 0, - I40E_VIRTCHNL_EVENT_LINK_CHANGE, - I40E_VIRTCHNL_EVENT_RESET_IMPENDING, - I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE, +enum virtchnl_event_codes { + VIRTCHNL_EVENT_UNKNOWN = 0, + VIRTCHNL_EVENT_LINK_CHANGE, + VIRTCHNL_EVENT_RESET_IMPENDING, + VIRTCHNL_EVENT_PF_DRIVER_CLOSE, }; #define I40E_PF_EVENT_SEVERITY_INFO 0 #define I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM 255 -struct i40e_virtchnl_pf_event { - enum i40e_virtchnl_event_codes event; +struct virtchnl_pf_event { + enum virtchnl_event_codes event; union { struct { enum i40e_aq_link_speed link_speed; @@ -393,7 +394,7 @@ struct i40e_virtchnl_pf_event { int severity; }; -/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP +/* VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP * VF uses this message to request PF to map IWARP vectors to IWARP queues. * The request for this originates from the VF IWARP driver through * a client interface between VF LAN and VF IWARP driver. @@ -412,16 +413,16 @@ struct i40e_virtchnl_pf_event { #define I40E_QUEUE_TYPE_PE_AEQ 0x80 #define I40E_QUEUE_INVALID_IDX 0xFFFF -struct i40e_virtchnl_iwarp_qv_info { +struct virtchnl_iwarp_qv_info { u32 v_idx; /* msix_vector */ u16 ceq_idx; u16 aeq_idx; u8 itr_idx; }; -struct i40e_virtchnl_iwarp_qvlist_info { +struct virtchnl_iwarp_qvlist_info { u32 num_vectors; - struct i40e_virtchnl_iwarp_qv_info qv_info[1]; + struct virtchnl_iwarp_qv_info qv_info[1]; }; /* VF reset states - these are written into the RSTAT register: @@ -436,11 +437,11 @@ struct i40e_virtchnl_iwarp_qvlist_info { * is in a reset state, it will return DEADBEEF, which, when masked * will result in 3. */ -enum i40e_vfr_states { - I40E_VFR_INPROGRESS = 0, - I40E_VFR_COMPLETED, - I40E_VFR_VFACTIVE, - I40E_VFR_UNKNOWN, +enum virtchnl_vfr_states { + VIRTCHNL_VFR_INPROGRESS = 0, + VIRTCHNL_VFR_COMPLETED, + VIRTCHNL_VFR_VFACTIVE, + VIRTCHNL_VFR_UNKNOWN, }; -#endif /* _I40E_VIRTCHNL_H_ */ +#endif /* _VIRTCHNL_H_ */ -- cgit v1.2.3 From eedcfef85b15ae02c488625556702594a618c616 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 11 May 2017 11:23:13 -0700 Subject: virtchnl: convert to new macros As part of the conversion, change the arguments to VF_IS_V1[01] macros and move them to virtchnl.h Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index a8b616121960..8ffa670c2ffd 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -123,6 +123,9 @@ struct virtchnl_version_info { u32 minor; }; +#define VF_IS_V10(_v) (((_v)->major == 1) && ((_v)->minor == 0)) +#define VF_IS_V11(_ver) (((_ver)->major == 1) && ((_ver)->minor == 1)) + /* VIRTCHNL_OP_RESET_VF * VF sends this request to PF with no parameters * PF does NOT respond! VF driver must delay then poll VFGEN_RSTAT register -- cgit v1.2.3 From 764430ce6f8c38d7ed3b6d2cfe9450b9d3c78809 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 11 May 2017 11:23:14 -0700 Subject: i40e/virtchnl: refactor code for validate checks This change updates the arguments passed to the validate function and fixes the caller, as well as uses the new return values added to virtchnl.h One other minor tweak, remove a duplicate set to zero of valid_len. This is in preparation for moving the function to virtchnl.h. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 8ffa670c2ffd..f1cc1f02036e 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -53,6 +53,23 @@ * its queues, optionally add MAC and VLAN filters, and process traffic. */ +/* START GENERIC DEFINES + * Need to ensure the following enums and defines hold the same meaning and + * value in current and future projects + */ + +/* Error Codes */ +enum virtchnl_status_code { + VIRTCHNL_STATUS_SUCCESS = 0, + VIRTCHNL_ERR_PARAM = -5, + VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH = -38, + VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR = -39, + VIRTCHNL_STATUS_ERR_INVALID_VF_ID = -40, + VIRTCHNL_STATUS_NOT_SUPPORTED = -64, +}; + +/* END GENERIC DEFINES */ + /* Opcodes for VF-PF communication. These are placed in the v_opcode field * of the virtchnl_msg structure. */ -- cgit v1.2.3 From f0adc6e831baaef16577ea2af5eb3e91fd4efef4 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 11 May 2017 11:23:15 -0700 Subject: i40evf/virtchnl: whitespace cleanups This patch fixes up a bunch of whitespace issues introduced by the previous automated change of name from i40e to virtchnl. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index f1cc1f02036e..73970bd439fe 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -175,10 +175,10 @@ struct virtchnl_vsi_resource { #define VIRTCHNL_VF_OFFLOAD_IWARP 0x00000002 #define VIRTCHNL_VF_OFFLOAD_FCOE 0x00000004 #define VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008 -#define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 -#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 +#define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 +#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 #define VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 -#define VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 +#define VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 #define VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000 -- cgit v1.2.3 From ff3f4cc267f6f39c2fc525c8918c929809defbfa Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 11 May 2017 11:23:16 -0700 Subject: virtchnl: finish conversion to virtchnl interface This patch implements the complete version of the virtchnl.h file with final renames, and fixes the related code in i40e and i40evf. It also expands comments, and adds details on the usage of certain fields. In addition, due to the changes a couple of casts are needed to prevent errors found by sparse after renaming some fields. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 128 +++++++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 73970bd439fe..6c6fbb492b5d 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -29,28 +29,29 @@ /* Description: * This header file describes the VF-PF communication protocol used - * by the various i40e drivers. + * by the drivers for all devices starting from our 40G product line * * Admin queue buffer usage: - * desc->opcode is always i40e_aqc_opc_send_msg_to_pf + * desc->opcode is always aqc_opc_send_msg_to_pf * flags, retval, datalen, and data addr are all used normally. - * Firmware copies the cookie fields when sending messages between the PF and - * VF, but uses all other fields internally. Due to this limitation, we - * must send all messages as "indirect", i.e. using an external buffer. + * The Firmware copies the cookie fields when sending messages between the + * PF and VF, but uses all other fields internally. Due to this limitation, + * we must send all messages as "indirect", i.e. using an external buffer. * - * All the vsi indexes are relative to the VF. Each VF can have maximum of + * All the VSI indexes are relative to the VF. Each VF can have maximum of * three VSIs. All the queue indexes are relative to the VSI. Each VF can * have a maximum of sixteen queues for all of its VSIs. * * The PF is required to return a status code in v_retval for all messages - * except RESET_VF, which does not require any response. The return value is of - * i40e_status_code type, defined in the i40e_type.h. + * except RESET_VF, which does not require any response. The return value + * is of status_code type, defined in the shared type.h. * - * In general, VF driver initialization should roughly follow the order of these - * opcodes. The VF driver must first validate the API version of the PF driver, - * then request a reset, then get resources, then configure queues and - * interrupts. After these operations are complete, the VF driver may start - * its queues, optionally add MAC and VLAN filters, and process traffic. + * In general, VF driver initialization should roughly follow the order of + * these opcodes. The VF driver must first validate the API version of the + * PF driver, then request a reset, then get resources, then configure + * queues and interrupts. After these operations are complete, the VF + * driver may start its queues, optionally add MAC and VLAN filters, and + * process traffic. */ /* START GENERIC DEFINES @@ -68,6 +69,33 @@ enum virtchnl_status_code { VIRTCHNL_STATUS_NOT_SUPPORTED = -64, }; +#define VIRTCHNL_LINK_SPEED_100MB_SHIFT 0x1 +#define VIRTCHNL_LINK_SPEED_1000MB_SHIFT 0x2 +#define VIRTCHNL_LINK_SPEED_10GB_SHIFT 0x3 +#define VIRTCHNL_LINK_SPEED_40GB_SHIFT 0x4 +#define VIRTCHNL_LINK_SPEED_20GB_SHIFT 0x5 +#define VIRTCHNL_LINK_SPEED_25GB_SHIFT 0x6 + +enum virtchnl_link_speed { + VIRTCHNL_LINK_SPEED_UNKNOWN = 0, + VIRTCHNL_LINK_SPEED_100MB = BIT(VIRTCHNL_LINK_SPEED_100MB_SHIFT), + VIRTCHNL_LINK_SPEED_1GB = BIT(VIRTCHNL_LINK_SPEED_1000MB_SHIFT), + VIRTCHNL_LINK_SPEED_10GB = BIT(VIRTCHNL_LINK_SPEED_10GB_SHIFT), + VIRTCHNL_LINK_SPEED_40GB = BIT(VIRTCHNL_LINK_SPEED_40GB_SHIFT), + VIRTCHNL_LINK_SPEED_20GB = BIT(VIRTCHNL_LINK_SPEED_20GB_SHIFT), + VIRTCHNL_LINK_SPEED_25GB = BIT(VIRTCHNL_LINK_SPEED_25GB_SHIFT), +}; + +/* for hsplit_0 field of Rx HMC context */ +/* deprecated with AVF 1.0 */ +enum virtchnl_rx_hsplit { + VIRTCHNL_RX_HSPLIT_NO_SPLIT = 0, + VIRTCHNL_RX_HSPLIT_SPLIT_L2 = 1, + VIRTCHNL_RX_HSPLIT_SPLIT_IP = 2, + VIRTCHNL_RX_HSPLIT_SPLIT_TCP_UDP = 4, + VIRTCHNL_RX_HSPLIT_SPLIT_SCTP = 8, +}; + /* END GENERIC DEFINES */ /* Opcodes for VF-PF communication. These are placed in the v_opcode field @@ -77,6 +105,8 @@ enum virtchnl_ops { /* The PF sends status change events to VFs using * the VIRTCHNL_OP_EVENT opcode. * VFs send requests to the PF using the other ops. + * Use of "advanced opcode" features must be negotiated as part of capabilities + * exchange and are not considered part of base mode feature set. */ VIRTCHNL_OP_UNKNOWN = 0, VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */ @@ -96,14 +126,13 @@ enum virtchnl_ops { VIRTCHNL_OP_GET_STATS = 15, VIRTCHNL_OP_RSVD = 16, VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ - VIRTCHNL_OP_IWARP = 20, - VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, - VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22, + VIRTCHNL_OP_IWARP = 20, /* advanced opcode */ + VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, /* advanced opcode */ + VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22, /* advanced opcode */ VIRTCHNL_OP_CONFIG_RSS_KEY = 23, VIRTCHNL_OP_CONFIG_RSS_LUT = 24, VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, VIRTCHNL_OP_SET_RSS_HENA = 26, - }; /* Virtual channel message descriptor. This overlays the admin queue @@ -113,7 +142,7 @@ enum virtchnl_ops { struct virtchnl_msg { u8 pad[8]; /* AQ flags/opcode/len/retval fields */ enum virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */ - i40e_status v_retval; /* ditto for desc->retval */ + enum virtchnl_status_code v_retval; /* ditto for desc->retval */ u32 vfid; /* used by PF when sending to VF */ }; @@ -155,6 +184,15 @@ struct virtchnl_version_info { * are cleared. */ +/* VSI types that use VIRTCHNL interface for VF-PF communication. VSI_SRIOV + * vsi_type should always be 6 for backward compatibility. Add other fields + * as needed. + */ +enum virtchnl_vsi_type { + VIRTCHNL_VSI_TYPE_INVALID = 0, + VIRTCHNL_VSI_SRIOV = 6, +}; + /* VIRTCHNL_OP_GET_VF_RESOURCES * Version 1.0 VF sends this request to PF with no parameters * Version 1.1 VF sends this request to PF with u32 bitmap of its capabilities @@ -166,14 +204,18 @@ struct virtchnl_version_info { struct virtchnl_vsi_resource { u16 vsi_id; u16 num_queue_pairs; - enum i40e_vsi_type vsi_type; + enum virtchnl_vsi_type vsi_type; u16 qset_handle; u8 default_mac_addr[ETH_ALEN]; }; -/* VF offload flags */ -#define VIRTCHNL_VF_OFFLOAD_L2 0x00000001 + +/* VF offload flags + * VIRTCHNL_VF_OFFLOAD_L2 flag is inclusive of base mode L2 offloads including + * TX/RX Checksum offloading and TSO for non-tunnelled packets. + */ +#define VIRTCHNL_VF_OFFLOAD_L2 0x00000001 #define VIRTCHNL_VF_OFFLOAD_IWARP 0x00000002 -#define VIRTCHNL_VF_OFFLOAD_FCOE 0x00000004 +#define VIRTCHNL_VF_OFFLOAD_RSVD 0x00000004 #define VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008 #define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 #define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 @@ -182,11 +224,12 @@ struct virtchnl_vsi_resource { #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 #define VIRTCHNL_VF_OFFLOAD_ENCAP 0X00100000 -#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 +#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00200000 +#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 -#define I40E_VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ - VIRTCHNL_VF_OFFLOAD_VLAN | \ - VIRTCHNL_VF_OFFLOAD_RSS_PF) +#define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ + VIRTCHNL_VF_OFFLOAD_VLAN | \ + VIRTCHNL_VF_OFFLOAD_RSS_PF) struct virtchnl_vf_resource { u16 num_vsis; @@ -212,9 +255,9 @@ struct virtchnl_txq_info { u16 vsi_id; u16 queue_id; u16 ring_len; /* number of descriptors, multiple of 8 */ - u16 headwb_enabled; + u16 headwb_enabled; /* deprecated with AVF 1.0 */ u64 dma_ring_addr; - u64 dma_headwb_addr; + u64 dma_headwb_addr; /* deprecated with AVF 1.0 */ }; /* VIRTCHNL_OP_CONFIG_RX_QUEUE @@ -229,11 +272,11 @@ struct virtchnl_rxq_info { u16 queue_id; u32 ring_len; /* number of descriptors, multiple of 32 */ u16 hdr_size; - u16 splithdr_enabled; + u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; u64 dma_ring_addr; - enum i40e_hmc_obj_rx_hsplit_0 rx_split_pos; + enum virtchnl_rx_hsplit rx_split_pos; /* deprecated with AVF 1.0 */ }; /* VIRTCHNL_OP_CONFIG_VSI_QUEUES @@ -344,15 +387,15 @@ struct virtchnl_promisc_info { u16 flags; }; -#define I40E_FLAG_VF_UNICAST_PROMISC 0x00000001 -#define I40E_FLAG_VF_MULTICAST_PROMISC 0x00000002 +#define FLAG_VF_UNICAST_PROMISC 0x00000001 +#define FLAG_VF_MULTICAST_PROMISC 0x00000002 /* VIRTCHNL_OP_GET_STATS * VF sends this message to request stats for the selected VSI. VF uses * the virtchnl_queue_select struct to specify the VSI. The queue_id * field is ignored by the PF. * - * PF replies with struct i40e_eth_stats in an external buffer. + * PF replies with struct eth_stats in an external buffer. */ /* VIRTCHNL_OP_CONFIG_RSS_KEY @@ -382,7 +425,6 @@ struct virtchnl_rss_lut { * By default, the PF sets these to all possible traffic types that the * hardware supports. The VF can query this value if it wants to change the * traffic types that are hashed by the hardware. - * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h */ struct virtchnl_rss_hena { u64 hena; @@ -399,14 +441,15 @@ enum virtchnl_event_codes { VIRTCHNL_EVENT_RESET_IMPENDING, VIRTCHNL_EVENT_PF_DRIVER_CLOSE, }; -#define I40E_PF_EVENT_SEVERITY_INFO 0 -#define I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM 255 + +#define PF_EVENT_SEVERITY_INFO 0 +#define PF_EVENT_SEVERITY_CERTAIN_DOOM 255 struct virtchnl_pf_event { enum virtchnl_event_codes event; union { struct { - enum i40e_aq_link_speed link_speed; + enum virtchnl_link_speed link_speed; bool link_status; } link_event; } event_data; @@ -426,13 +469,6 @@ struct virtchnl_pf_event { * PF configures interrupt mapping and returns status. */ -/* HW does not define a type value for AEQ; only for RX/TX and CEQ. - * In order for us to keep the interface simple, SW will define a - * unique type value for AEQ. - */ -#define I40E_QUEUE_TYPE_PE_AEQ 0x80 -#define I40E_QUEUE_INVALID_IDX 0xFFFF - struct virtchnl_iwarp_qv_info { u32 v_idx; /* msix_vector */ u16 ceq_idx; @@ -446,8 +482,7 @@ struct virtchnl_iwarp_qvlist_info { }; /* VF reset states - these are written into the RSTAT register: - * I40E_VFGEN_RSTAT1 on the PF - * I40E_VFGEN_RSTAT on the VF + * VFGEN_RSTAT on the VF * When the PF initiates a reset, it writes 0 * When the reset is complete, it writes 1 * When the PF detects that the VF has recovered, it writes 2 @@ -461,7 +496,6 @@ enum virtchnl_vfr_states { VIRTCHNL_VFR_INPROGRESS = 0, VIRTCHNL_VFR_COMPLETED, VIRTCHNL_VFR_VFACTIVE, - VIRTCHNL_VFR_UNKNOWN, }; #endif /* _VIRTCHNL_H_ */ -- cgit v1.2.3 From 735e35c56bbc91621942dc5111b2970beb00e75a Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 11 May 2017 11:23:17 -0700 Subject: i40e/virtchnl: move function to virtchnl This moves a function that is needed for the virtchnl interface from the i40e PF driver over to the virtchnl.h file. It was manually verified that the function in question is unchanged except for the function name and function header, which explains the slight difference in the number of lines removed/added. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 147 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 6c6fbb492b5d..dab76e947b9f 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -498,4 +498,151 @@ enum virtchnl_vfr_states { VIRTCHNL_VFR_VFACTIVE, }; +/** + * virtchnl_vc_validate_vf_msg + * @ver: Virtchnl version info + * @v_opcode: Opcode for the message + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * validate msg format against struct for each opcode + */ +static inline int +virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, + u8 *msg, u16 msglen) +{ + bool err_msg_format = false; + int valid_len = 0; + + /* Validate message length. */ + switch (v_opcode) { + case VIRTCHNL_OP_VERSION: + valid_len = sizeof(struct virtchnl_version_info); + break; + case VIRTCHNL_OP_RESET_VF: + break; + case VIRTCHNL_OP_GET_VF_RESOURCES: + if (VF_IS_V11(ver)) + valid_len = sizeof(u32); + break; + case VIRTCHNL_OP_CONFIG_TX_QUEUE: + valid_len = sizeof(struct virtchnl_txq_info); + break; + case VIRTCHNL_OP_CONFIG_RX_QUEUE: + valid_len = sizeof(struct virtchnl_rxq_info); + break; + case VIRTCHNL_OP_CONFIG_VSI_QUEUES: + valid_len = sizeof(struct virtchnl_vsi_queue_config_info); + if (msglen >= valid_len) { + struct virtchnl_vsi_queue_config_info *vqc = + (struct virtchnl_vsi_queue_config_info *)msg; + valid_len += (vqc->num_queue_pairs * + sizeof(struct + virtchnl_queue_pair_info)); + if (vqc->num_queue_pairs == 0) + err_msg_format = true; + } + break; + case VIRTCHNL_OP_CONFIG_IRQ_MAP: + valid_len = sizeof(struct virtchnl_irq_map_info); + if (msglen >= valid_len) { + struct virtchnl_irq_map_info *vimi = + (struct virtchnl_irq_map_info *)msg; + valid_len += (vimi->num_vectors * + sizeof(struct virtchnl_vector_map)); + if (vimi->num_vectors == 0) + err_msg_format = true; + } + break; + case VIRTCHNL_OP_ENABLE_QUEUES: + case VIRTCHNL_OP_DISABLE_QUEUES: + valid_len = sizeof(struct virtchnl_queue_select); + break; + case VIRTCHNL_OP_ADD_ETH_ADDR: + case VIRTCHNL_OP_DEL_ETH_ADDR: + valid_len = sizeof(struct virtchnl_ether_addr_list); + if (msglen >= valid_len) { + struct virtchnl_ether_addr_list *veal = + (struct virtchnl_ether_addr_list *)msg; + valid_len += veal->num_elements * + sizeof(struct virtchnl_ether_addr); + if (veal->num_elements == 0) + err_msg_format = true; + } + break; + case VIRTCHNL_OP_ADD_VLAN: + case VIRTCHNL_OP_DEL_VLAN: + valid_len = sizeof(struct virtchnl_vlan_filter_list); + if (msglen >= valid_len) { + struct virtchnl_vlan_filter_list *vfl = + (struct virtchnl_vlan_filter_list *)msg; + valid_len += vfl->num_elements * sizeof(u16); + if (vfl->num_elements == 0) + err_msg_format = true; + } + break; + case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: + valid_len = sizeof(struct virtchnl_promisc_info); + break; + case VIRTCHNL_OP_GET_STATS: + valid_len = sizeof(struct virtchnl_queue_select); + break; + case VIRTCHNL_OP_IWARP: + /* These messages are opaque to us and will be validated in + * the RDMA client code. We just need to check for nonzero + * length. The firmware will enforce max length restrictions. + */ + if (msglen) + valid_len = msglen; + else + err_msg_format = true; + break; + case VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP: + break; + case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: + valid_len = sizeof(struct virtchnl_iwarp_qvlist_info); + if (msglen >= valid_len) { + struct virtchnl_iwarp_qvlist_info *qv = + (struct virtchnl_iwarp_qvlist_info *)msg; + if (qv->num_vectors == 0) { + err_msg_format = true; + break; + } + valid_len += ((qv->num_vectors - 1) * + sizeof(struct virtchnl_iwarp_qv_info)); + } + break; + case VIRTCHNL_OP_CONFIG_RSS_KEY: + valid_len = sizeof(struct virtchnl_rss_key); + if (msglen >= valid_len) { + struct virtchnl_rss_key *vrk = + (struct virtchnl_rss_key *)msg; + valid_len += vrk->key_len - 1; + } + break; + case VIRTCHNL_OP_CONFIG_RSS_LUT: + valid_len = sizeof(struct virtchnl_rss_lut); + if (msglen >= valid_len) { + struct virtchnl_rss_lut *vrl = + (struct virtchnl_rss_lut *)msg; + valid_len += vrl->lut_entries - 1; + } + break; + case VIRTCHNL_OP_GET_RSS_HENA_CAPS: + break; + case VIRTCHNL_OP_SET_RSS_HENA: + valid_len = sizeof(struct virtchnl_rss_hena); + break; + /* These are always errors coming from the VF. */ + case VIRTCHNL_OP_EVENT: + case VIRTCHNL_OP_UNKNOWN: + default: + return VIRTCHNL_ERR_PARAM; + } + /* few more checks */ + if ((valid_len != msglen) || (err_msg_format)) + return VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH; + + return 0; +} #endif /* _VIRTCHNL_H_ */ -- cgit v1.2.3 From a33c83c4353b2efc4d883bad06a86a9ba2dde4fc Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 11 May 2017 11:23:18 -0700 Subject: virtchnl: Add pad fields to a couple of structures This removes holes and makes structure sizes consistent across 32 and 64 bit builds. Signed-off-by: Sridhar Samudrala Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index dab76e947b9f..72466c69f749 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -275,8 +275,10 @@ struct virtchnl_rxq_info { u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; + u32 pad1; u64 dma_ring_addr; enum virtchnl_rx_hsplit rx_split_pos; /* deprecated with AVF 1.0 */ + u32 pad2; }; /* VIRTCHNL_OP_CONFIG_VSI_QUEUES @@ -295,6 +297,7 @@ struct virtchnl_queue_pair_info { struct virtchnl_vsi_queue_config_info { u16 vsi_id; u16 num_queue_pairs; + u32 pad; struct virtchnl_queue_pair_info qpair[1]; }; -- cgit v1.2.3 From 73556269aab30c39cba9cf8efafc402d0deb87b2 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 11 May 2017 11:23:19 -0700 Subject: virtchnl: Add compile time static asserts to validate structure sizes This uses preprocessor tricks to make sure that a divide by zero occurs if a struct changes size outside the expected number of bytes. Signed-off-by: Sridhar Samudrala Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 72466c69f749..c893b9520a67 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -135,6 +135,14 @@ enum virtchnl_ops { VIRTCHNL_OP_SET_RSS_HENA = 26, }; +/* This macro is used to generate a compilation error if a structure + * is not exactly the correct length. It gives a divide by zero error if the + * structure is not of the correct size, otherwise it creates an enum that is + * never used. + */ +#define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \ + { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } + /* Virtual channel message descriptor. This overlays the admin queue * descriptor. All other data is passed in external buffers. */ @@ -146,6 +154,8 @@ struct virtchnl_msg { u32 vfid; /* used by PF when sending to VF */ }; +VIRTCHNL_CHECK_STRUCT_LEN(20, virtchnl_msg); + /* Message descriptions and data structures.*/ /* VIRTCHNL_OP_VERSION @@ -169,6 +179,8 @@ struct virtchnl_version_info { u32 minor; }; +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_version_info); + #define VF_IS_V10(_v) (((_v)->major == 1) && ((_v)->minor == 0)) #define VF_IS_V11(_ver) (((_ver)->major == 1) && ((_ver)->minor == 1)) @@ -209,6 +221,8 @@ struct virtchnl_vsi_resource { u8 default_mac_addr[ETH_ALEN]; }; +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); + /* VF offload flags * VIRTCHNL_VF_OFFLOAD_L2 flag is inclusive of base mode L2 offloads including * TX/RX Checksum offloading and TSO for non-tunnelled packets. @@ -244,6 +258,8 @@ struct virtchnl_vf_resource { struct virtchnl_vsi_resource vsi_res[1]; }; +VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_vf_resource); + /* VIRTCHNL_OP_CONFIG_TX_QUEUE * VF sends this message to set up parameters for one TX queue. * External data buffer contains one instance of virtchnl_txq_info. @@ -260,6 +276,8 @@ struct virtchnl_txq_info { u64 dma_headwb_addr; /* deprecated with AVF 1.0 */ }; +VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); + /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. @@ -281,6 +299,8 @@ struct virtchnl_rxq_info { u32 pad2; }; +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_rxq_info); + /* VIRTCHNL_OP_CONFIG_VSI_QUEUES * VF sends this message to set parameters for all active TX and RX queues * associated with the specified VSI. @@ -294,6 +314,8 @@ struct virtchnl_queue_pair_info { struct virtchnl_rxq_info rxq; }; +VIRTCHNL_CHECK_STRUCT_LEN(64, virtchnl_queue_pair_info); + struct virtchnl_vsi_queue_config_info { u16 vsi_id; u16 num_queue_pairs; @@ -301,6 +323,8 @@ struct virtchnl_vsi_queue_config_info { struct virtchnl_queue_pair_info qpair[1]; }; +VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info); + /* VIRTCHNL_OP_CONFIG_IRQ_MAP * VF uses this message to map vectors to queues. * The rxq_map and txq_map fields are bitmaps used to indicate which queues @@ -317,11 +341,15 @@ struct virtchnl_vector_map { u16 txitr_idx; }; +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_vector_map); + struct virtchnl_irq_map_info { u16 num_vectors; struct virtchnl_vector_map vecmap[1]; }; +VIRTCHNL_CHECK_STRUCT_LEN(14, virtchnl_irq_map_info); + /* VIRTCHNL_OP_ENABLE_QUEUES * VIRTCHNL_OP_DISABLE_QUEUES * VF sends these message to enable or disable TX/RX queue pairs. @@ -337,6 +365,8 @@ struct virtchnl_queue_select { u32 tx_queues; }; +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_select); + /* VIRTCHNL_OP_ADD_ETH_ADDR * VF sends this message in order to add one or more unicast or multicast * address filters for the specified VSI. @@ -354,12 +384,16 @@ struct virtchnl_ether_addr { u8 pad[2]; }; +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_ether_addr); + struct virtchnl_ether_addr_list { u16 vsi_id; u16 num_elements; struct virtchnl_ether_addr list[1]; }; +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_ether_addr_list); + /* VIRTCHNL_OP_ADD_VLAN * VF sends this message to add one or more VLAN tag filters for receives. * PF adds the filters and returns status. @@ -380,6 +414,8 @@ struct virtchnl_vlan_filter_list { u16 vlan_id[1]; }; +VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_vlan_filter_list); + /* VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE * VF sends VSI id and flags. * PF returns status code in retval. @@ -390,6 +426,8 @@ struct virtchnl_promisc_info { u16 flags; }; +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_promisc_info); + #define FLAG_VF_UNICAST_PROMISC 0x00000001 #define FLAG_VF_MULTICAST_PROMISC 0x00000002 @@ -416,12 +454,16 @@ struct virtchnl_rss_key { u8 key[1]; /* RSS hash key, packed bytes */ }; +VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key); + struct virtchnl_rss_lut { u16 vsi_id; u16 lut_entries; u8 lut[1]; /* RSS lookup table*/ }; +VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut); + /* VIRTCHNL_OP_GET_RSS_HENA_CAPS * VIRTCHNL_OP_SET_RSS_HENA * VF sends these messages to get and set the hash filter enable bits for RSS. @@ -433,6 +475,8 @@ struct virtchnl_rss_hena { u64 hena; }; +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena); + /* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other @@ -460,6 +504,8 @@ struct virtchnl_pf_event { int severity; }; +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_pf_event); + /* VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP * VF uses this message to request PF to map IWARP vectors to IWARP queues. * The request for this originates from the VF IWARP driver through @@ -479,11 +525,15 @@ struct virtchnl_iwarp_qv_info { u8 itr_idx; }; +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_iwarp_qv_info); + struct virtchnl_iwarp_qvlist_info { u32 num_vectors; struct virtchnl_iwarp_qv_info qv_info[1]; }; +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_iwarp_qvlist_info); + /* VF reset states - these are written into the RSTAT register: * VFGEN_RSTAT on the VF * When the PF initiates a reset, it writes 0 -- cgit v1.2.3 From 278cba7eaf5422510fc4a6b5a4d447f17b00506e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 14 Dec 2016 03:35:03 +0200 Subject: drm: omapdrm: Remove unused default display name support The default display name is both unused and never set by platform data. Remove default display name module parameter, platform data field and runtime infrastructure. Signed-off-by: Laurent Pinchart Acked-by: Bartlomiej Zolnierkiewicz Reviewed-by: Tomi Valkeinen Signed-off-by: Tomi Valkeinen --- include/linux/platform_data/omapdss.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/omapdss.h b/include/linux/platform_data/omapdss.h index 679177929045..7feb011ed500 100644 --- a/include/linux/platform_data/omapdss.h +++ b/include/linux/platform_data/omapdss.h @@ -27,7 +27,6 @@ enum omapdss_version { /* Board specific data */ struct omap_dss_board_info { - const char *default_display_name; int (*dsi_enable_pads)(int dsi_id, unsigned int lane_mask); void (*dsi_disable_pads)(int dsi_id, unsigned int lane_mask); int (*set_min_bus_tput)(struct device *dev, unsigned long r); -- cgit v1.2.3 From a9548c55295a4268f9187e1ec93264a0682fa745 Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 28 Apr 2017 12:55:20 +0400 Subject: usb: gadget: Allow a non-SuperSpeed gadget to support LPM This commit allows a gadget that does not support SuperSpeed to indicate that it supports LPM. It does this by setting the 'lpm_capable' flag in the gadget structure. If a gadget sets this, the composite gadget framework will set the bcdUSB to 0x0201 to indicate that this supports BOS descriptors, and also return a USB 2.0 Extension descriptor as part of the BOS descriptor set. See USB 2.0 LPM ECN Section 3. Signed-off-by: John Youn Signed-off-by: Sevak Arakelyan Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index fbc22a39e7bc..3ee5f2a7c0b4 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -352,6 +352,8 @@ struct usb_gadget_ops { * @deactivated: True if gadget is deactivated - in deactivated state it cannot * be connected. * @connected: True if gadget is connected. + * @lpm_capable: If the gadget max_speed is FULL or HIGH, this flag + * indicates that it supports LPM as per the LPM ECN & errata. * * Gadgets have a mostly-portable "gadget driver" implementing device * functions, handling all usb configurations and interfaces. Gadget @@ -404,6 +406,7 @@ struct usb_gadget { unsigned is_selfpowered:1; unsigned deactivated:1; unsigned connected:1; + unsigned lpm_capable:1; }; #define work_to_gadget(w) (container_of((w), struct usb_gadget, work)) -- cgit v1.2.3 From 3a5f8997dc643a0e0e9a0895c2214b21e5e774a2 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Thu, 1 Jun 2017 15:37:02 +0800 Subject: team: add macro MODULE_ALIAS_TEAM_MODE for team mode alias Add a new macro MODULE_ALIAS_TEAM_MODE to unify and simplify the declaration of team mode alias. Signed-off-by: Zhang Shengju Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_team.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index c05216a8fbac..30294603526f 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -298,4 +298,6 @@ extern void team_mode_unregister(const struct team_mode *mode); #define TEAM_DEFAULT_NUM_TX_QUEUES 16 #define TEAM_DEFAULT_NUM_RX_QUEUES 16 +#define MODULE_ALIAS_TEAM_MODE(kind) MODULE_ALIAS("team-mode-" kind) + #endif /* _LINUX_IF_TEAM_H_ */ -- cgit v1.2.3 From 3c5da94278026a4583320f97f6547573fb3a93aa Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Fri, 2 Jun 2017 08:58:31 +0300 Subject: qed: Share additional information with qedf Share several new tidbits with qedf: - wwpn & wwnn - Absolute pf-id [this one is actually meant for qedi as well] - Number of available CQs While we're at it, now that qedf will be aware of the available CQs we can add some validation on the inputs it provides. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_fcoe_if.h | 5 +++++ include/linux/qed/qed_if.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h index bd6bcb809415..1e015c50e6b8 100644 --- a/include/linux/qed/qed_fcoe_if.h +++ b/include/linux/qed/qed_fcoe_if.h @@ -24,6 +24,11 @@ struct qed_dev_fcoe_info { void __iomem *primary_dbq_rq_addr; void __iomem *secondary_bdq_rq_addr; + + u64 wwpn; + u64 wwnn; + + u8 num_cqs; }; struct qed_fcoe_params_offload { diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 607e1c5e185a..e29c6f74a4d4 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -360,6 +360,8 @@ struct qed_dev_info { bool vxlan_enable; bool gre_enable; bool geneve_enable; + + u8 abs_pf_id; }; enum qed_sb_type { -- cgit v1.2.3 From 20675b37ee76d11430fd3d4da0851fc6a4e36abc Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Fri, 2 Jun 2017 08:58:32 +0300 Subject: qed: Support NVM-image reading API Storage drivers require images from the nvram in boot-from-SAN scenarios. This provides the necessary API between qed and the protocol drivers to perform such reads. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e29c6f74a4d4..567ea3ea6c0e 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -156,6 +156,11 @@ struct qed_dcbx_get { struct qed_dcbx_admin_params local; }; +enum qed_nvm_images { + QED_NVM_IMAGE_ISCSI_CFG, + QED_NVM_IMAGE_FCOE_CFG, +}; + enum qed_led_mode { QED_LED_MODE_OFF, QED_LED_MODE_ON, @@ -630,6 +635,19 @@ struct qed_common_ops { void (*chain_free)(struct qed_dev *cdev, struct qed_chain *p_chain); +/** + * @brief nvm_get_image - reads an entire image from nvram + * + * @param cdev + * @param type - type of the request nvram image + * @param buf - preallocated buffer to fill with the image + * @param len - length of the allocated buffer + * + * @return 0 on success, error otherwise + */ + int (*nvm_get_image)(struct qed_dev *cdev, + enum qed_nvm_images type, u8 *buf, u16 len); + /** * @brief get_coalesce - Get coalesce parameters in usec * -- cgit v1.2.3 From dc4528e9e890f82900d75ac6276aba8ce89a80b6 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Fri, 2 Jun 2017 08:58:33 +0300 Subject: qed: Add support for changing iSCSI mac Enhance API between qedi and qed, allowing qedi to inform device's firmware when the iSCSI mac is to be changed. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_iscsi_if.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h index 3414649133d2..111e606a74c8 100644 --- a/include/linux/qed/qed_iscsi_if.h +++ b/include/linux/qed/qed_iscsi_if.h @@ -210,6 +210,11 @@ struct qed_iscsi_cb_ops { * @param stats - pointer to struck that would be filled * we stats * @return 0 on success, error otherwise. + * @change_mac Change MAC of interface + * @param cdev + * @param handle - the connection handle. + * @param mac - new MAC to configure. + * @return 0 on success, otherwise error value. */ struct qed_iscsi_ops { const struct qed_common_ops *common; @@ -248,6 +253,8 @@ struct qed_iscsi_ops { int (*get_stats)(struct qed_dev *cdev, struct qed_iscsi_stats *stats); + + int (*change_mac)(struct qed_dev *cdev, u32 handle, const u8 *mac); }; const struct qed_iscsi_ops *qed_get_iscsi_ops(void); -- cgit v1.2.3 From 428c9de583921c4b699622272c04af4e362c474c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 28 Apr 2017 15:08:53 +0200 Subject: clk: Provide dummy of_clk_get_from_provider() for compile-testing When CONFIG_ON=n, dummies are provided for of_clk_get() and of_clk_get_by_name(), but not for of_clk_get_from_provider(). Provide a dummy for the latter, to improve the ability to do compile-testing. This requires removing the existing dummy in the Lantiq clock code. Fixes: 766e6a4ec602d0c1 ("clk: add DT clock binding support") Signed-off-by: Geert Uytterhoeven Acked-by: Thomas Langer Signed-off-by: Stephen Boyd --- include/linux/clk.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 024cd07870d0..e790d19b8ffb 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -539,6 +539,10 @@ static inline struct clk *of_clk_get_by_name(struct device_node *np, { return ERR_PTR(-ENOENT); } +static inline struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec) +{ + return ERR_PTR(-ENOENT); +} #endif #endif -- cgit v1.2.3 From d2f31c49cf7cfe8f02b70614ae56a39b0c1d8a75 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 May 2017 23:11:39 +0200 Subject: i2c: sh_mobile: remove platform_data No platform currently upstream makes use of this platform_data anymore. The ones that did are converted to DT meanwhile. So, remove it. The old platforms likely don't have the 'clks_per_cnt' feature, otherwise it would have been implemented by now. And in the unlikely case they need to setup a different bus speed, we should rather go for a generic i2c platform data just for that. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- include/linux/i2c/i2c-sh_mobile.h | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 include/linux/i2c/i2c-sh_mobile.h (limited to 'include/linux') diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h deleted file mode 100644 index 06e3089795fb..000000000000 --- a/include/linux/i2c/i2c-sh_mobile.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __I2C_SH_MOBILE_H__ -#define __I2C_SH_MOBILE_H__ - -#include - -struct i2c_sh_mobile_platform_data { - unsigned long bus_speed; - unsigned int clks_per_count; -}; - -#endif /* __I2C_SH_MOBILE_H__ */ -- cgit v1.2.3 From 266e4e9d9150e98141b85c7400f8aa3cd57a7f9b Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 19 May 2017 21:49:04 +0800 Subject: clk: add clk_bulk_get accessories These helper function allows drivers to get several clk consumers in one operation. If any of the clk cannot be acquired then any clks that were got will be put before returning to the caller. This can relieve the driver owners' life who needs to handle many clocks, as well as each clock error reporting. Cc: Michael Turquette Cc: Stephen Boyd Cc: Russell King Cc: Geert Uytterhoeven Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: Mark Brown Cc: Shawn Guo Cc: Fabio Estevam Cc: Sascha Hauer Cc: Anson Huang Cc: Robin Gong Cc: Bai Ping Cc: Leonard Crestez Cc: Octavian Purdila Signed-off-by: Dong Aisheng Signed-off-by: Stephen Boyd --- include/linux/clk.h | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 024cd07870d0..72b0cfce9165 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -77,6 +77,21 @@ struct clk_notifier_data { unsigned long new_rate; }; +/** + * struct clk_bulk_data - Data used for bulk clk operations. + * + * @id: clock consumer ID + * @clk: struct clk * to store the associated clock + * + * The CLK APIs provide a series of clk_bulk_() API calls as + * a convenience to consumers which require multiple clks. This + * structure is used to manage data for these calls. + */ +struct clk_bulk_data { + const char *id; + struct clk *clk; +}; + #ifdef CONFIG_COMMON_CLK /** @@ -185,12 +200,20 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q) */ #ifdef CONFIG_HAVE_CLK_PREPARE int clk_prepare(struct clk *clk); +int __must_check clk_bulk_prepare(int num_clks, + const struct clk_bulk_data *clks); #else static inline int clk_prepare(struct clk *clk) { might_sleep(); return 0; } + +static inline int clk_bulk_prepare(int num_clks, struct clk_bulk_data *clks) +{ + might_sleep(); + return 0; +} #endif /** @@ -204,11 +227,16 @@ static inline int clk_prepare(struct clk *clk) */ #ifdef CONFIG_HAVE_CLK_PREPARE void clk_unprepare(struct clk *clk); +void clk_bulk_unprepare(int num_clks, const struct clk_bulk_data *clks); #else static inline void clk_unprepare(struct clk *clk) { might_sleep(); } +static inline void clk_bulk_unprepare(int num_clks, struct clk_bulk_data *clks) +{ + might_sleep(); +} #endif #ifdef CONFIG_HAVE_CLK @@ -229,6 +257,29 @@ static inline void clk_unprepare(struct clk *clk) */ struct clk *clk_get(struct device *dev, const char *id); +/** + * clk_bulk_get - lookup and obtain a number of references to clock producer. + * @dev: device for clock "consumer" + * @num_clks: the number of clk_bulk_data + * @clks: the clk_bulk_data table of consumer + * + * This helper function allows drivers to get several clk consumers in one + * operation. If any of the clk cannot be acquired then any clks + * that were obtained will be freed before returning to the caller. + * + * Returns 0 if all clocks specified in clk_bulk_data table are obtained + * successfully, or valid IS_ERR() condition containing errno. + * The implementation uses @dev and @clk_bulk_data.id to determine the + * clock consumer, and thereby the clock producer. + * The clock returned is stored in each @clk_bulk_data.clk field. + * + * Drivers must assume that the clock source is not enabled. + * + * clk_bulk_get should not be called from within interrupt context. + */ +int __must_check clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks); + /** * devm_clk_get - lookup and obtain a managed reference to a clock producer. * @dev: device for clock "consumer" @@ -278,6 +329,18 @@ struct clk *devm_get_clk_from_child(struct device *dev, */ int clk_enable(struct clk *clk); +/** + * clk_bulk_enable - inform the system when the set of clks should be running. + * @num_clks: the number of clk_bulk_data + * @clks: the clk_bulk_data table of consumer + * + * May be called from atomic contexts. + * + * Returns success (0) or negative errno. + */ +int __must_check clk_bulk_enable(int num_clks, + const struct clk_bulk_data *clks); + /** * clk_disable - inform the system when the clock source is no longer required. * @clk: clock source @@ -294,6 +357,24 @@ int clk_enable(struct clk *clk); */ void clk_disable(struct clk *clk); +/** + * clk_bulk_disable - inform the system when the set of clks is no + * longer required. + * @num_clks: the number of clk_bulk_data + * @clks: the clk_bulk_data table of consumer + * + * Inform the system that a set of clks is no longer required by + * a driver and may be shut down. + * + * May be called from atomic contexts. + * + * Implementation detail: if the set of clks is shared between + * multiple drivers, clk_bulk_enable() calls must be balanced by the + * same number of clk_bulk_disable() calls for the clock source to be + * disabled. + */ +void clk_bulk_disable(int num_clks, const struct clk_bulk_data *clks); + /** * clk_get_rate - obtain the current clock rate (in Hz) for a clock source. * This is only valid once the clock source has been enabled. @@ -313,6 +394,19 @@ unsigned long clk_get_rate(struct clk *clk); */ void clk_put(struct clk *clk); +/** + * clk_bulk_put - "free" the clock source + * @num_clks: the number of clk_bulk_data + * @clks: the clk_bulk_data table of consumer + * + * Note: drivers must ensure that all clk_bulk_enable calls made on this + * clock source are balanced by clk_bulk_disable calls prior to calling + * this function. + * + * clk_bulk_put should not be called from within interrupt context. + */ +void clk_bulk_put(int num_clks, struct clk_bulk_data *clks); + /** * devm_clk_put - "free" a managed clock source * @dev: device used to acquire the clock @@ -445,6 +539,12 @@ static inline struct clk *clk_get(struct device *dev, const char *id) return NULL; } +static inline int clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks) +{ + return 0; +} + static inline struct clk *devm_clk_get(struct device *dev, const char *id) { return NULL; @@ -458,6 +558,8 @@ static inline struct clk *devm_get_clk_from_child(struct device *dev, static inline void clk_put(struct clk *clk) {} +static inline void clk_bulk_put(int num_clks, struct clk_bulk_data *clks) {} + static inline void devm_clk_put(struct device *dev, struct clk *clk) {} static inline int clk_enable(struct clk *clk) @@ -465,8 +567,17 @@ static inline int clk_enable(struct clk *clk) return 0; } +static inline int clk_bulk_enable(int num_clks, struct clk_bulk_data *clks) +{ + return 0; +} + static inline void clk_disable(struct clk *clk) {} + +static inline void clk_bulk_disable(int num_clks, + struct clk_bulk_data *clks) {} + static inline unsigned long clk_get_rate(struct clk *clk) { return 0; -- cgit v1.2.3 From 618aee02e2f57042f4cdeab228caf631e524b281 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 19 May 2017 21:49:05 +0800 Subject: clk: add managed version of clk_bulk_get This patch introduces the managed version of clk_bulk_get. Cc: Michael Turquette Cc: Stephen Boyd Cc: Russell King Cc: Geert Uytterhoeven Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Cc: Mark Brown Cc: Shawn Guo Cc: Fabio Estevam Cc: Sascha Hauer Cc: Anson Huang Cc: Robin Gong Cc: Bai Ping Cc: Leonard Crestez Cc: Octavian Purdila Signed-off-by: Dong Aisheng Signed-off-by: Stephen Boyd --- include/linux/clk.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 72b0cfce9165..c673f0b91751 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -280,6 +280,21 @@ struct clk *clk_get(struct device *dev, const char *id); int __must_check clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks); +/** + * devm_clk_bulk_get - managed get multiple clk consumers + * @dev: device for clock "consumer" + * @num_clks: the number of clk_bulk_data + * @clks: the clk_bulk_data table of consumer + * + * Return 0 on success, an errno on failure. + * + * This helper function allows drivers to get several clk + * consumers in one operation with management, the clks will + * automatically be freed when the device is unbound. + */ +int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks); + /** * devm_clk_get - lookup and obtain a managed reference to a clock producer. * @dev: device for clock "consumer" @@ -550,6 +565,12 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id) return NULL; } +static inline int devm_clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks) +{ + return 0; +} + static inline struct clk *devm_get_clk_from_child(struct device *dev, struct device_node *np, const char *con_id) { -- cgit v1.2.3 From cfe76a28e37112f471d4bcb8d5f336e3416299b7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 12 May 2017 09:40:49 +0200 Subject: clk: versatile: delete old RealView clock implementation The old RealView clock implementation is not used anymore (nothing in the kernel calls realview_clk_init()) as we have moved all clocks over to device tree. Delete it. Signed-off-by: Linus Walleij Signed-off-by: Stephen Boyd --- include/linux/platform_data/clk-realview.h | 1 - 1 file changed, 1 deletion(-) delete mode 100644 include/linux/platform_data/clk-realview.h (limited to 'include/linux') diff --git a/include/linux/platform_data/clk-realview.h b/include/linux/platform_data/clk-realview.h deleted file mode 100644 index 2e426a7dbc51..000000000000 --- a/include/linux/platform_data/clk-realview.h +++ /dev/null @@ -1 +0,0 @@ -void realview_clk_init(void __iomem *sysbase, bool is_pb1176); -- cgit v1.2.3 From 5c82a6ae0242416cfead597bb2b42aa3481a0ba7 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 2 Jun 2017 14:15:15 +0200 Subject: rtc: remove rtc_device.name rtc->name is only used in messages were it is superfluous. Remove it completely from the structure. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b693adac853b..d354f56e0cf5 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -116,7 +116,6 @@ struct rtc_device { struct module *owner; int id; - char name[RTC_DEVICE_NAME_SIZE]; const struct rtc_class_ops *ops; struct mutex ops_lock; -- cgit v1.2.3 From bab3548078237706f53baafe43ae58257225549d Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Tue, 30 May 2017 12:39:53 -0700 Subject: usb: typec: Add a sysfs node to manage port type User space applications in some cases have the need to enforce a specific port type(DFP/UFP/DRP). This change allows userspace to attempt setting the desired port type. Low level drivers can however reject the request if the specific port type is not supported. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index d1d2ebcf36ec..ffe7487886ca 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -190,6 +190,7 @@ struct typec_partner_desc { * @pr_set: Set Power Role * @vconn_set: Set VCONN Role * @activate_mode: Enter/exit given Alternate Mode + * @port_type_set: Set port type * * Static capabilities of a single USB Type-C port. */ @@ -214,6 +215,9 @@ struct typec_capability { int (*activate_mode)(const struct typec_capability *, int mode, int activate); + int (*port_type_set)(const struct typec_capability *, + enum typec_port_type); + }; /* Specific to try_role(). Indicates the user want's to clear the preference. */ -- cgit v1.2.3 From 615ffd63149117aa5693d6672944966b490cdb66 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 31 May 2017 17:59:30 +0100 Subject: arm,arm64,drivers: move externs in a new header file Create a new header file (include/linux/arch_topology.h) and put there declarations of interfaces used by arm, arm64 and drivers code. Signed-off-by: Juri Lelli Acked-by: Russell King Acked-by: Catalin Marinas Acked-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- include/linux/arch_topology.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/arch_topology.h (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h new file mode 100644 index 000000000000..4edae9fe8cdd --- /dev/null +++ b/include/linux/arch_topology.h @@ -0,0 +1,17 @@ +/* + * include/linux/arch_topology.h - arch specific cpu topology information + */ +#ifndef _LINUX_ARCH_TOPOLOGY_H_ +#define _LINUX_ARCH_TOPOLOGY_H_ + +void normalize_cpu_capacity(void); + +struct device_node; +int parse_cpu_capacity(struct device_node *cpu_node, int cpu); + +struct sched_domain; +unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu); + +void set_capacity_scale(unsigned int cpu, unsigned long capacity); + +#endif /* _LINUX_ARCH_TOPOLOGY_H_ */ -- cgit v1.2.3 From 4ca4f26a9c66103ca158689b7554f07f4968a32c Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 31 May 2017 17:59:31 +0100 Subject: arm,arm64,drivers: add a prefix to drivers arch_topology interfaces Now that some functions that deal with arch topology information live under drivers, there is a clash of naming that might create confusion. Tidy things up by creating a topology namespace for interfaces used by arch code; achieve this by prepending a 'topology_' prefix to driver interfaces. Signed-off-by: Juri Lelli Acked-by: Russell King Acked-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- include/linux/arch_topology.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 4edae9fe8cdd..9af3c174c03a 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -4,14 +4,14 @@ #ifndef _LINUX_ARCH_TOPOLOGY_H_ #define _LINUX_ARCH_TOPOLOGY_H_ -void normalize_cpu_capacity(void); +void topology_normalize_cpu_scale(void); struct device_node; -int parse_cpu_capacity(struct device_node *cpu_node, int cpu); +int topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); struct sched_domain; -unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu); +unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu); -void set_capacity_scale(unsigned int cpu, unsigned long capacity); +void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ -- cgit v1.2.3 From a3b02a9c6591ce154cd44e2383406390a45b530c Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Sun, 14 May 2017 21:51:06 +0200 Subject: mux: minimal mux subsystem Add a new minimalistic subsystem that handles multiplexer controllers. When multiplexers are used in various places in the kernel, and the same multiplexer controller can be used for several independent things, there should be one place to implement support for said multiplexer controller. A single multiplexer controller can also be used to control several parallel multiplexers, that are in turn used by different subsystems in the kernel, leading to a need to coordinate multiplexer accesses. The multiplexer subsystem handles this coordination. Thanks go out to Lars-Peter Clausen, Jonathan Cameron, Rob Herring, Wolfram Sang, Paul Gortmaker, Dan Carpenter, Colin Ian King, Greg Kroah-Hartman and last but certainly not least to Philipp Zabel for helpful comments, reviews, patches and general encouragement! Reviewed-by: Jonathan Cameron Signed-off-by: Peter Rosin Reviewed-by: Philipp Zabel Tested-by: Philipp Zabel Signed-off-by: Greg Kroah-Hartman --- include/linux/mux/consumer.h | 32 +++++++++++++ include/linux/mux/driver.h | 108 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 include/linux/mux/consumer.h create mode 100644 include/linux/mux/driver.h (limited to 'include/linux') diff --git a/include/linux/mux/consumer.h b/include/linux/mux/consumer.h new file mode 100644 index 000000000000..5577e1b773c4 --- /dev/null +++ b/include/linux/mux/consumer.h @@ -0,0 +1,32 @@ +/* + * mux/consumer.h - definitions for the multiplexer consumer interface + * + * Copyright (C) 2017 Axentia Technologies AB + * + * Author: Peter Rosin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _LINUX_MUX_CONSUMER_H +#define _LINUX_MUX_CONSUMER_H + +struct device; +struct mux_control; + +unsigned int mux_control_states(struct mux_control *mux); +int __must_check mux_control_select(struct mux_control *mux, + unsigned int state); +int __must_check mux_control_try_select(struct mux_control *mux, + unsigned int state); +int mux_control_deselect(struct mux_control *mux); + +struct mux_control *mux_control_get(struct device *dev, const char *mux_name); +void mux_control_put(struct mux_control *mux); + +struct mux_control *devm_mux_control_get(struct device *dev, + const char *mux_name); + +#endif /* _LINUX_MUX_CONSUMER_H */ diff --git a/include/linux/mux/driver.h b/include/linux/mux/driver.h new file mode 100644 index 000000000000..35c3579c3304 --- /dev/null +++ b/include/linux/mux/driver.h @@ -0,0 +1,108 @@ +/* + * mux/driver.h - definitions for the multiplexer driver interface + * + * Copyright (C) 2017 Axentia Technologies AB + * + * Author: Peter Rosin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _LINUX_MUX_DRIVER_H +#define _LINUX_MUX_DRIVER_H + +#include +#include +#include + +struct mux_chip; +struct mux_control; + +/** + * struct mux_control_ops - Mux controller operations for a mux chip. + * @set: Set the state of the given mux controller. + */ +struct mux_control_ops { + int (*set)(struct mux_control *mux, int state); +}; + +/** + * struct mux_control - Represents a mux controller. + * @lock: Protects the mux controller state. + * @chip: The mux chip that is handling this mux controller. + * @cached_state: The current mux controller state, or -1 if none. + * @states: The number of mux controller states. + * @idle_state: The mux controller state to use when inactive, or one + * of MUX_IDLE_AS_IS and MUX_IDLE_DISCONNECT. + * + * Mux drivers may only change @states and @idle_state, and may only do so + * between allocation and registration of the mux controller. Specifically, + * @cached_state is internal to the mux core and should never be written by + * mux drivers. + */ +struct mux_control { + struct semaphore lock; /* protects the state of the mux */ + + struct mux_chip *chip; + int cached_state; + + unsigned int states; + int idle_state; +}; + +/** + * struct mux_chip - Represents a chip holding mux controllers. + * @controllers: Number of mux controllers handled by the chip. + * @mux: Array of mux controllers that are handled. + * @dev: Device structure. + * @id: Used to identify the device internally. + * @ops: Mux controller operations. + */ +struct mux_chip { + unsigned int controllers; + struct mux_control *mux; + struct device dev; + int id; + + const struct mux_control_ops *ops; +}; + +#define to_mux_chip(x) container_of((x), struct mux_chip, dev) + +/** + * mux_chip_priv() - Get the extra memory reserved by mux_chip_alloc(). + * @mux_chip: The mux-chip to get the private memory from. + * + * Return: Pointer to the private memory reserved by the allocator. + */ +static inline void *mux_chip_priv(struct mux_chip *mux_chip) +{ + return &mux_chip->mux[mux_chip->controllers]; +} + +struct mux_chip *mux_chip_alloc(struct device *dev, + unsigned int controllers, size_t sizeof_priv); +int mux_chip_register(struct mux_chip *mux_chip); +void mux_chip_unregister(struct mux_chip *mux_chip); +void mux_chip_free(struct mux_chip *mux_chip); + +struct mux_chip *devm_mux_chip_alloc(struct device *dev, + unsigned int controllers, + size_t sizeof_priv); +int devm_mux_chip_register(struct device *dev, struct mux_chip *mux_chip); + +/** + * mux_control_get_index() - Get the index of the given mux controller + * @mux: The mux-control to get the index for. + * + * Return: The index of the mux controller within the mux chip the mux + * controller is a part of. + */ +static inline unsigned int mux_control_get_index(struct mux_control *mux) +{ + return mux - mux->chip->mux; +} + +#endif /* _LINUX_MUX_DRIVER_H */ -- cgit v1.2.3 From 8a848e754956dcbc35cd2fcf417f66dafa020c9e Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Sun, 14 May 2017 21:51:08 +0200 Subject: iio: inkern: api for manipulating ext_info of iio channels Extend the inkern api with functions for reading and writing ext_info of iio channels. Acked-by: Jonathan Cameron Signed-off-by: Peter Rosin Signed-off-by: Greg Kroah-Hartman --- include/linux/iio/consumer.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 47eeec3218b5..5e347a9805fd 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -312,4 +312,41 @@ int iio_read_channel_scale(struct iio_channel *chan, int *val, int iio_convert_raw_to_processed(struct iio_channel *chan, int raw, int *processed, unsigned int scale); +/** + * iio_get_channel_ext_info_count() - get number of ext_info attributes + * connected to the channel. + * @chan: The channel being queried + * + * Returns the number of ext_info attributes + */ +unsigned int iio_get_channel_ext_info_count(struct iio_channel *chan); + +/** + * iio_read_channel_ext_info() - read ext_info attribute from a given channel + * @chan: The channel being queried. + * @attr: The ext_info attribute to read. + * @buf: Where to store the attribute value. Assumed to hold + * at least PAGE_SIZE bytes. + * + * Returns the number of bytes written to buf (perhaps w/o zero termination; + * it need not even be a string), or an error code. + */ +ssize_t iio_read_channel_ext_info(struct iio_channel *chan, + const char *attr, char *buf); + +/** + * iio_write_channel_ext_info() - write ext_info attribute from a given channel + * @chan: The channel being queried. + * @attr: The ext_info attribute to read. + * @buf: The new attribute value. Strings needs to be zero- + * terminated, but the terminator should not be included + * in the below len. + * @len: The size of the new attribute value. + * + * Returns the number of accepted bytes, which should be the same as len. + * An error code can also be returned. + */ +ssize_t iio_write_channel_ext_info(struct iio_channel *chan, const char *attr, + const char *buf, size_t len); + #endif -- cgit v1.2.3 From 201d7f47f34bd7cb19161d0426f13b141e381f30 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 31 May 2017 11:58:32 +0200 Subject: genirq: Handle NOAUTOEN interrupt setup proper If an interrupt is marked NOAUTOEN then request_irq() installs the action, but does not enable the interrupt via startup_irq(). The interrupt is enabled via enable_irq() later from the driver. enable_irq() calls irq_enable(). That means that for interrupts which have a irq_startup() callback this callback is never invoked. Neither is irq_domain_activate_irq() invoked for such interrupts. If an interrupt depends on irq_startup() or irq_domain_activate_irq() then the enable via irq_enable() is not enough. Add a status flag IRQD_IRQ_STARTED_UP and use this to select the proper mechanism in enable_irq(). Use the flag also to avoid pointless calls into the low level functions. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: dianders@chromium.org Cc: jeffy Cc: Brian Norris Cc: tfiga@chromium.org Link: http://lkml.kernel.org/r/20170531100212.130986205@linutronix.de --- include/linux/irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index f887351aa80e..94d1ad6ffdd4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -216,6 +216,7 @@ enum { IRQD_WAKEUP_ARMED = (1 << 19), IRQD_FORWARDED_TO_VCPU = (1 << 20), IRQD_AFFINITY_MANAGED = (1 << 21), + IRQD_IRQ_STARTED = (1 << 22), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -329,6 +330,11 @@ static inline void irqd_clr_activated(struct irq_data *d) __irqd_to_state(d) &= ~IRQD_ACTIVATED; } +static inline bool irqd_is_started(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_IRQ_STARTED; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) -- cgit v1.2.3 From 7994200ce69a3873dfa2641254a13bb0a40056f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Jun 2017 21:00:59 +0200 Subject: ia64: Remove HAVE_ARCH_COPY_SIGINFO Since ia64 defines __ARCH_SI_PREAMBLE_SIZE it can just use the generic copy_siginfo implementation, which is identical to the architecture specific one. With that support for HAVE_ARCH_COPY_SIGINFO can go away entirely. Signed-off-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Fenghua Yu Cc: Tony Luck Cc: linux-ia64@vger.kernel.org Cc: Arnd Bergmann Cc: sparclinux@vger.kernel.org Cc: "David S. Miller" Link: http://lkml.kernel.org/r/20170603190102.28866-3-hch@lst.de --- include/linux/signal.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 1f5a16620693..80c7418be359 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -3,16 +3,13 @@ #include #include +#include struct task_struct; /* for sysctl */ extern int print_fatal_signals; -#ifndef HAVE_ARCH_COPY_SIGINFO - -#include - static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) { if (from->si_code < 0) @@ -22,8 +19,6 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld)); } -#endif - /* * Define some primitives to manipulate sigset_t. */ -- cgit v1.2.3 From 31ea70e0308b73a1b862bd17c06efc3cbcfd2016 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Jun 2017 21:01:00 +0200 Subject: posix-timers: Move the do_schedule_next_timer declaration Having it in asm-generic/siginfo.h doesn't make any sense as it is in no way architecture specific. Move it to posix-timers.h instead. Signed-off-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Fenghua Yu Cc: Tony Luck Cc: linux-ia64@vger.kernel.org Cc: Arnd Bergmann Cc: sparclinux@vger.kernel.org Cc: "David S. Miller" Link: http://lkml.kernel.org/r/20170603190102.28866-4-hch@lst.de --- include/linux/posix-timers.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 34e893a75771..8929f7e8f452 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -7,6 +7,7 @@ #include #include +struct siginfo; struct cpu_timer_list { struct list_head entry; @@ -120,4 +121,6 @@ long clock_nanosleep_restart(struct restart_block *restart_block); void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); +void do_schedule_next_timer(struct siginfo *info); + #endif -- cgit v1.2.3 From b9253a43370e8f3c46c0ee24b04fa2ffec37b7c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Jun 2017 21:01:01 +0200 Subject: signal: Move copy_siginfo_to_user to Having it in asm-generic/siginfo.h doesn't make any sense as it is in no way architecture specific. Move it to signal.h instead where several related functions already reside. Signed-off-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Fenghua Yu Cc: Tony Luck Cc: linux-ia64@vger.kernel.org Cc: Arnd Bergmann Cc: sparclinux@vger.kernel.org Cc: "David S. Miller" Link: http://lkml.kernel.org/r/20170603190102.28866-5-hch@lst.de --- include/linux/signal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 80c7418be359..a39feddd71ba 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -19,6 +19,8 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld)); } +int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from); + /* * Define some primitives to manipulate sigset_t. */ -- cgit v1.2.3 From 3a06c7ac24f9f24ec059cd77c2dbdf7fbfd0aaaf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2017 23:15:38 +0200 Subject: posix-clocks: Remove interval timer facility and mmap/fasync callbacks The only user of this facility is ptp_clock, which does not implement any of those functions. Remove them to prevent accidental users. Especially the interval timer interfaces are now more or less impossible to implement because the necessary infrastructure has been confined to the core code. Aside of that it's really complex to make these callbacks implemented according to spec as the alarm timer implementation demonstrates. If at all then a nanosleep callback might be a reasonable extension. For now keep just what ptp_clock needs. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/20170530211656.145036286@linutronix.de --- include/linux/posix-clock.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 83b22ae9ae12..38d8225510f1 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -42,12 +42,6 @@ struct posix_clock; * @clock_gettime: Read the current time * @clock_getres: Get the clock resolution * @clock_settime: Set the current time value - * @timer_create: Create a new timer - * @timer_delete: Remove a previously created timer - * @timer_gettime: Get remaining time and interval of a timer - * @timer_settime: Set a timer's initial expiration and interval - * @fasync: Optional character device fasync method - * @mmap: Optional character device mmap method * @open: Optional character device open method * @release: Optional character device release method * @ioctl: Optional character device ioctl method @@ -66,28 +60,12 @@ struct posix_clock_operations { int (*clock_settime)(struct posix_clock *pc, const struct timespec64 *ts); - int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit); - - int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit); - - void (*timer_gettime)(struct posix_clock *pc, - struct k_itimer *kit, struct itimerspec64 *tsp); - - int (*timer_settime)(struct posix_clock *pc, - struct k_itimer *kit, int flags, - struct itimerspec64 *tsp, struct itimerspec64 *old); /* * Optional character device methods: */ - int (*fasync) (struct posix_clock *pc, - int fd, struct file *file, int on); - long (*ioctl) (struct posix_clock *pc, unsigned int cmd, unsigned long arg); - int (*mmap) (struct posix_clock *pc, - struct vm_area_struct *vma); - int (*open) (struct posix_clock *pc, fmode_t f_mode); uint (*poll) (struct posix_clock *pc, -- cgit v1.2.3 From 03676b41a8ffcbb1f6d9eb6ca754b2bfa431fd59 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2017 23:15:40 +0200 Subject: posix-timers: Cleanup struct k_itimer As a preparation for further changes, cleanup the formatting of the k_itimer structure and add kernel doc comments. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/20170530211656.316574129@linutronix.de --- include/linux/posix-timers.h | 61 +++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 8929f7e8f452..e06062c3967b 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -49,35 +49,54 @@ struct cpu_timer_list { #define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) #define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3)) -/* POSIX.1b interval timer structure. */ -struct k_itimer { - struct list_head list; /* free/ allocate list */ - struct hlist_node t_hash; - spinlock_t it_lock; - clockid_t it_clock; /* which timer type */ - timer_t it_id; /* timer id */ - int it_overrun; /* overrun on pending signal */ - int it_overrun_last; /* overrun on last delivered signal */ - int it_requeue_pending; /* waiting to requeue this timer */ #define REQUEUE_PENDING 1 - int it_sigev_notify; /* notify word of sigevent struct */ - struct signal_struct *it_signal; + +/** + * struct k_itimer - POSIX.1b interval timer structure. + * @list: List head for binding the timer to signals->posix_timers + * @t_hash: Entry in the posix timer hash table + * @it_lock: Lock protecting the timer + * @it_clock: The posix timer clock id + * @it_id: The posix timer id for identifying the timer + * @it_overrun: The overrun counter for pending signals + * @it_overrun_last: The overrun at the time of the last delivered signal + * @it_requeue_pending: Indicator that timer waits for being requeued on + * signal delivery + * @it_sigev_notify: The notify word of sigevent struct for signal delivery + * @it_signal: Pointer to the creators signal struct + * @it_pid: The pid of the process/task targeted by the signal + * @it_process: The task to wakeup on clock_nanosleep (CPU timers) + * @sigq: Pointer to preallocated sigqueue + * @it: Union representing the various posix timer type + * internals. Also used for rcu freeing the timer. + */ +struct k_itimer { + struct list_head list; + struct hlist_node t_hash; + spinlock_t it_lock; + clockid_t it_clock; + timer_t it_id; + int it_overrun; + int it_overrun_last; + int it_requeue_pending; + int it_sigev_notify; + struct signal_struct *it_signal; union { - struct pid *it_pid; /* pid of process to send signal to */ - struct task_struct *it_process; /* for clock_nanosleep */ + struct pid *it_pid; + struct task_struct *it_process; }; - struct sigqueue *sigq; /* signal queue entry. */ + struct sigqueue *sigq; union { struct { - struct hrtimer timer; - ktime_t interval; + struct hrtimer timer; + ktime_t interval; } real; - struct cpu_timer_list cpu; + struct cpu_timer_list cpu; struct { - struct alarm alarmtimer; - ktime_t interval; + struct alarm alarmtimer; + ktime_t interval; } alarm; - struct rcu_head rcu; + struct rcu_head rcu; } it; }; -- cgit v1.2.3 From bab0aae9dcba9466dcc968b8bd21914f8f691631 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2017 23:15:41 +0200 Subject: posix-timers: Move posix-timer internals to core None of these declarations is required outside of kernel/time. Move them to an internal header. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170530211656.394803853@linutronix.de --- include/linux/posix-timers.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index e06062c3967b..a372e7e3a396 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -100,36 +100,6 @@ struct k_itimer { } it; }; -struct k_clock { - int (*clock_getres) (const clockid_t which_clock, struct timespec64 *tp); - int (*clock_set) (const clockid_t which_clock, - const struct timespec64 *tp); - int (*clock_get) (const clockid_t which_clock, struct timespec64 *tp); - int (*clock_adj) (const clockid_t which_clock, struct timex *tx); - int (*timer_create) (struct k_itimer *timer); - int (*nsleep) (const clockid_t which_clock, int flags, - struct timespec64 *, struct timespec __user *); - long (*nsleep_restart) (struct restart_block *restart_block); - int (*timer_set) (struct k_itimer *timr, int flags, - struct itimerspec64 *new_setting, - struct itimerspec64 *old_setting); - int (*timer_del) (struct k_itimer *timr); -#define TIMER_RETRY 1 - void (*timer_get) (struct k_itimer *timr, - struct itimerspec64 *cur_setting); -}; - -extern const struct k_clock clock_posix_cpu; -extern const struct k_clock clock_posix_dynamic; -extern const struct k_clock clock_process; -extern const struct k_clock clock_thread; -extern const struct k_clock alarm_clock; - -/* function to call to trigger timer event */ -int posix_timer_event(struct k_itimer *timr, int si_private); - -void posix_cpu_timer_schedule(struct k_itimer *timer); - void run_posix_cpu_timers(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); -- cgit v1.2.3 From 80105cd0e62ba8a2caf8eebd52f42952c7c04046 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2017 23:15:43 +0200 Subject: posix-timers: Move interval out of the union Preparatory patch to unify the alarm timer and hrtimer based posix interval timer handling. The interval is used as a criteria for rearming decisions so moving it out of the clock specific data structures allows later unification. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/20170530211656.563922908@linutronix.de --- include/linux/posix-timers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index a372e7e3a396..908048f488ae 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -63,6 +63,7 @@ struct cpu_timer_list { * @it_requeue_pending: Indicator that timer waits for being requeued on * signal delivery * @it_sigev_notify: The notify word of sigevent struct for signal delivery + * @it_interval: The interval for periodic timers * @it_signal: Pointer to the creators signal struct * @it_pid: The pid of the process/task targeted by the signal * @it_process: The task to wakeup on clock_nanosleep (CPU timers) @@ -80,6 +81,7 @@ struct k_itimer { int it_overrun_last; int it_requeue_pending; int it_sigev_notify; + ktime_t it_interval; struct signal_struct *it_signal; union { struct pid *it_pid; @@ -89,12 +91,10 @@ struct k_itimer { union { struct { struct hrtimer timer; - ktime_t interval; } real; struct cpu_timer_list cpu; struct { struct alarm alarmtimer; - ktime_t interval; } alarm; struct rcu_head rcu; } it; -- cgit v1.2.3 From d97bb75ddd2f38068df01da8abf26df78756253c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2017 23:15:44 +0200 Subject: posix-timers: Store k_clock pointer in k_itimer Having the k_clock pointer in the k_itimer struct avoids the lookup in several code pathes and makes the next steps of unification of the hrtimer and alarmtimer based posix timers simpler. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/20170530211656.641222072@linutronix.de --- include/linux/posix-timers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 908048f488ae..8f9cca390cdb 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -56,6 +56,7 @@ struct cpu_timer_list { * @list: List head for binding the timer to signals->posix_timers * @t_hash: Entry in the posix timer hash table * @it_lock: Lock protecting the timer + * @kclock: Pointer to the k_clock struct handling this timer * @it_clock: The posix timer clock id * @it_id: The posix timer id for identifying the timer * @it_overrun: The overrun counter for pending signals @@ -75,6 +76,7 @@ struct k_itimer { struct list_head list; struct hlist_node t_hash; spinlock_t it_lock; + const struct k_clock *kclock; clockid_t it_clock; timer_t it_id; int it_overrun; -- cgit v1.2.3 From 96fe3b072f134e4993f829d599eaa1e0eb5a10e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2017 23:15:46 +0200 Subject: posix-timers: Rename do_schedule_next_timer That function is a misnomer. Rename it with a proper prefix to posixtimer_rearm(). Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/20170530211656.811362578@linutronix.de --- include/linux/posix-timers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 8f9cca390cdb..771e5f788c90 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -112,6 +112,6 @@ long clock_nanosleep_restart(struct restart_block *restart_block); void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); -void do_schedule_next_timer(struct siginfo *info); +void posixtimer_rearm(struct siginfo *info); #endif -- cgit v1.2.3 From 21e55c1f83880a56360287c00f2b5cd5e5a4a912 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 May 2017 23:15:48 +0200 Subject: posix-timers: Add active flag to k_itimer Keep track of the activation state of posix timers. This is a preparatory change for making common_timer_get() usable by both hrtimer and alarm timer implementations. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/20170530211656.967783982@linutronix.de --- include/linux/posix-timers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 771e5f788c90..667095dbcd37 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -59,6 +59,7 @@ struct cpu_timer_list { * @kclock: Pointer to the k_clock struct handling this timer * @it_clock: The posix timer clock id * @it_id: The posix timer id for identifying the timer + * @it_active: Marker that timer is active * @it_overrun: The overrun counter for pending signals * @it_overrun_last: The overrun at the time of the last delivered signal * @it_requeue_pending: Indicator that timer waits for being requeued on @@ -79,6 +80,7 @@ struct k_itimer { const struct k_clock *kclock; clockid_t it_clock; timer_t it_id; + int it_active; int it_overrun; int it_overrun_last; int it_requeue_pending; -- cgit v1.2.3 From 2387149eade25f32dcf1398811b3d0293181d005 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:43:51 +0200 Subject: KVM: improve arch vcpu request defining Marc Zyngier suggested that we define the arch specific VCPU request base, rather than requiring each arch to remember to start from 8. That suggestion, along with Radim Krcmar's recent VCPU request flag addition, snowballed into defining something of an arch VCPU request defining API. No functional change. (Looks like x86 is running out of arch VCPU request bits. Maybe someday we'll need to extend to 64.) Signed-off-by: Andrew Jones Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8c0664309815..3724b51aab64 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -126,6 +126,13 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_PENDING_TIMER 2 #define KVM_REQ_UNHALT 3 +#define KVM_REQUEST_ARCH_BASE 8 + +#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ + BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \ + (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ +}) +#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From 2fa6e1e12a024b48b2c7ea39f50205246e027da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Sun, 4 Jun 2017 14:43:52 +0200 Subject: KVM: add kvm_request_pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A first step in vcpu->requests encapsulation. Additionally, we now use READ_ONCE() when accessing vcpu->requests, which ensures we always load vcpu->requests when it's accessed. This is important as other threads can change it any time. Also, READ_ONCE() documents that vcpu->requests is used with other threads, likely requiring memory barriers, which it does. Signed-off-by: Radim Krčmář [ Documented the new use of READ_ONCE() and converted another check in arch/mips/kvm/vz.c ] Signed-off-by: Andrew Jones Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3724b51aab64..0b50e7b35ed4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1105,6 +1105,11 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) set_bit(req & KVM_REQUEST_MASK, &vcpu->requests); } +static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->requests); +} + static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) { return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests); -- cgit v1.2.3 From f91840a32deef5cb1bf73338bc5010f843b01426 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 2 Jun 2017 21:03:52 -0700 Subject: perf, bpf: Add BPF support to all perf_event types Allow BPF_PROG_TYPE_PERF_EVENT program types to attach to all perf_event types, including HW_CACHE, RAW, and dynamic pmu events. Only tracepoint/kprobe events are treated differently which require BPF_PROG_TYPE_TRACEPOINT/BPF_PROG_TYPE_KPROBE program types accordingly. Also add support for reading all event counters using bpf_perf_event_read() helper. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/perf_event.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 24a635887f28..8fc5f0fada5e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -896,7 +896,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); -extern u64 perf_event_read_local(struct perf_event *event); +int perf_event_read_local(struct perf_event *event, u64 *value); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1301,7 +1301,10 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event * { return ERR_PTR(-EINVAL); } -static inline u64 perf_event_read_local(struct perf_event *event) { return -EINVAL; } +static inline int perf_event_read_local(struct perf_event *event, u64 *value) +{ + return -EINVAL; +} static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } -- cgit v1.2.3 From 48a1df65334b74bd7531f932cca5928932abf769 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 4 Jun 2017 04:16:22 +0200 Subject: skbuff: return -EMSGSIZE in skb_to_sgvec to prevent overflow This is a defense-in-depth measure in response to bugs like 4d6fa57b4dab ("macsec: avoid heap overflow in skb_to_sgvec"). There's not only a potential overflow of sglist items, but also a stack overflow potential, so we fix this by limiting the amount of recursion this function is allowed to do. Not actually providing a bounded base case is a future disaster that we can easily avoid here. As a small matter of house keeping, we take this opportunity to move the documentation comment over the actual function the documentation is for. While this could be implemented by using an explicit stack of skbuffs, when implementing this, the function complexity increased considerably, and I don't think such complexity and bloat is actually worth it. So, instead I built this and tested it on x86, x86_64, ARM, ARM64, and MIPS, and measured the stack usage there. I also reverted the recent MIPS changes that give it a separate IRQ stack, so that I could experience some worst-case situations. I found that limiting it to 24 layers deep yielded a good stack usage with room for safety, as well as being much deeper than any driver actually ever creates. Signed-off-by: Jason A. Donenfeld Cc: Steffen Klassert Cc: Herbert Xu Cc: "David S. Miller" Cc: David Howells Cc: Sabrina Dubroca Cc: "Michael S. Tsirkin" Cc: Jason Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 45a59c1e0cc7..d460a4cbda1c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -953,10 +953,10 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); -int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, - int offset, int len); -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, - int len); +int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); +int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); int skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) consume_skb(a) -- cgit v1.2.3 From f604b17d7fdef574792a7e0b39f1b926d6b43d9d Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 4 Jun 2017 13:31:01 +0300 Subject: qed*: L2 interface to use the SB structures directly Part of an effort of a cleaner seperation between qed and the protocol drivers, the L2 interface is to use the SB structure for initialization purposes opaquely. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index d66d16a559e1..fd72056f8d49 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -47,8 +47,7 @@ struct qed_queue_start_common_params { /* Relative, but relevant only for PFs */ u8 stats_id; - /* These are always absolute */ - u16 sb; + struct qed_sb_info *p_sb; u8 sb_idx; }; -- cgit v1.2.3 From 08bc8f15e69cbd9f8e3d7bbba4814cec50d51cfe Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 4 Jun 2017 13:31:06 +0300 Subject: qed: Multiple qzone queues for VFs This adds the infrastructure for supporting VFs that want to open multiple transmission queues on the same queue-zone. At this point, there are no VFs that actually request this functionality, but later patches would remedy that. a. VF and PF would communicate the capability during ACQUIRE; Legacy VFs would continue on behaving as they do today b. PF would communicate number of supported CIDs to the VF and would enforce said limitation c. Whenever VF passes a request for a given queue configuration it would also pass an associated index within said queue-zone Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 567ea3ea6c0e..74f6b99754aa 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -185,6 +185,10 @@ struct qed_eth_pf_params { */ u16 num_cons; + /* per-VF number of CIDs */ + u8 num_vf_cons; +#define ETH_PF_PARAMS_VF_CONS_DEFAULT (32) + /* To enable arfs, previous to HW-init a positive number needs to be * set [as filters require allocated searcher ILT memory]. * This will set the maximal number of configured steering-filters. -- cgit v1.2.3 From cbb8a12c089c7f04b86d08d89bdab71ec9bff1f5 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 4 Jun 2017 13:31:08 +0300 Subject: qed: VF XDP support The final addition on the qed front - - VFs would now require their PFs to provide multiple CIDs - Based on the availability of connections from PF, determine whether XDP is feasible and share it with qede via dev_info. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index fd72056f8d49..0eef0a2b1901 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -73,6 +73,9 @@ struct qed_dev_eth_info { /* Legacy VF - this affects the datapath, so qede has to know */ bool is_legacy; + + /* Might depend on available resources [in case of VF] */ + bool xdp_supported; }; struct qed_update_vport_rss_params { -- cgit v1.2.3 From 9b01d43170aa70a435105f6413759e2ab7e00219 Mon Sep 17 00:00:00 2001 From: Perr Zhang Date: Fri, 2 Jun 2017 11:59:53 +0800 Subject: sched/header: Remove leftover, obsolete comment There is no more set_task_vxid() helper, remove its description. Signed-off-by: Perr Zhang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170602035953.28949-1-strongbox8@zoho.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b69fc650201..1abaa3728bf7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1096,8 +1096,6 @@ static inline struct pid *task_session(struct task_struct *task) * current. * task_xid_nr_ns() : id seen from the ns specified; * - * set_task_vxid() : assigns a virtual id to a task; - * * see also pid_nr() etc in include/linux/pid.h */ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); -- cgit v1.2.3 From 45ca7df7c345465dbd2426a33012c9c33d27de62 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 27 Apr 2017 15:08:51 +0100 Subject: firmware: arm_scpi: add support to populate OPPs and get transition latency Currently only CPU devices use the transition latency and the OPPs populated in the SCPI driver. scpi-cpufreq has logic to handle these. However, even GPU and other users of SCPI DVFS will need the same logic. In order to avoid duplication, this patch adds support to get DVFS transition latency and add all the OPPs to the device using OPP library helper functions. The helper functions added here can be used for any device whose DVFS are managed by SCPI. Also, we also have incorrect dependency on the cluster identifier for the CPUs. It's fundamentally wrong as the domain id need not match the cluster id. This patch gets rid of that dependency by making use of the clock bindings which are already in place. Signed-off-by: Sudeep Holla --- include/linux/scpi_protocol.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h index dc5f989be226..327d65663dbf 100644 --- a/include/linux/scpi_protocol.h +++ b/include/linux/scpi_protocol.h @@ -67,6 +67,9 @@ struct scpi_ops { int (*dvfs_get_idx)(u8); int (*dvfs_set_idx)(u8, u8); struct scpi_dvfs_info *(*dvfs_get_info)(u8); + int (*device_domain_id)(struct device *); + int (*get_transition_latency)(struct device *); + int (*add_opps_to_device)(struct device *); int (*sensor_get_capability)(u16 *sensors); int (*sensor_get_info)(u16 sensor_id, struct scpi_sensor_info *); int (*sensor_get_value)(u16, u64 *); -- cgit v1.2.3 From 4722974d90e06d0164ca1b73a6b34cec6bdb64ad Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Jun 2017 14:30:49 +0100 Subject: rxrpc: Implement service upgrade Implement AuriStor's service upgrade facility. There are three problems that this is meant to deal with: (1) Various of the standard AFS RPC calls have IPv4 addresses in their requests and/or replies - but there's no room for including IPv6 addresses. (2) Definition of IPv6-specific RPC operations in the standard operation sets has not yet been achieved. (3) One could envision the creation a new service on the same port that as the original service. The new service could implement improved operations - and the client could try this first, falling back to the original service if it's not there. Unfortunately, certain servers ignore packets addressed to a service they don't implement and don't respond in any way - not even with an ABORT. This means that the client must then wait for the call timeout to occur. What service upgrade does is to see if the connection is marked as being 'upgradeable' and if so, change the service ID in the server and thus the request and reply formats. Note that the upgrade isn't mandatory - a server that supports only the original call set will ignore the upgrade request. In the protocol, the procedure is then as follows: (1) To request an upgrade, the first DATA packet in a new connection must have the userStatus set to 1 (this is normally 0). The userStatus value is normally ignored by the server. (2) If the server doesn't support upgrading, the reply packets will contain the same service ID as for the first request packet. (3) If the server does support upgrading, all future reply packets on that connection will contain the new service ID and the new service ID will be applied to *all* further calls on that connection as well. (4) The RPC op used to probe the upgrade must take the same request data as the shadow call in the upgrade set (but may return a different reply). GetCapability RPC ops were added to all standard sets for just this purpose. Ops where the request formats differ cannot be used for probing. (5) The client must wait for completion of the probe before sending any further RPC ops to the same destination. It should then use the service ID that recvmsg() reported back in all future calls. (6) The shadow service must have call definitions for all the operation IDs defined by the original service. To support service upgrading, a server should: (1) Call bind() twice on its AF_RXRPC socket before calling listen(). Each bind() should supply a different service ID, but the transport addresses must be the same. This allows the server to receive requests with either service ID. (2) Enable automatic upgrading by calling setsockopt(), specifying RXRPC_UPGRADEABLE_SERVICE and passing in a two-member array of unsigned shorts as the argument: unsigned short optval[2]; This specifies a pair of service IDs. They must be different and must match the service IDs bound to the socket. Member 0 is the service ID to upgrade from and member 1 is the service ID to upgrade to. Signed-off-by: David Howells --- include/linux/rxrpc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h index c68307bc306f..634116561a6a 100644 --- a/include/linux/rxrpc.h +++ b/include/linux/rxrpc.h @@ -37,6 +37,7 @@ struct sockaddr_rxrpc { #define RXRPC_SECURITY_KEYRING 2 /* [srvr] set ring of server security keys */ #define RXRPC_EXCLUSIVE_CONNECTION 3 /* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */ #define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */ +#define RXRPC_UPGRADEABLE_SERVICE 5 /* Upgrade service[0] -> service[1] */ /* * RxRPC control messages -- cgit v1.2.3 From 4e255721d1575a766ada06dc7eb03acdcd34eaaf Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Jun 2017 14:30:49 +0100 Subject: rxrpc: Add service upgrade support for client connections Make it possible for a client to use AuriStor's service upgrade facility. The client does this by adding an RXRPC_UPGRADE_SERVICE control message to the first sendmsg() of a call. This takes no parameters. When recvmsg() starts returning data from the call, the service ID field in the returned msg_name will reflect the result of the upgrade attempt. If the upgrade was ignored, srx_service will match what was set in the sendmsg(); if the upgrade happened the srx_service will be altered to indicate the service the server upgraded to. Note that: (1) The choice of upgrade service is up to the server (2) Further client calls to the same server that would share a connection are blocked if an upgrade probe is in progress. (3) This should only be used to probe the service. Clients should then use the returned service ID in all subsequent communications with that server (and not set the upgrade). Note that the kernel will not retain this information should the connection expire from its cache. (4) If a server that supports upgrading is replaced by one that doesn't, whilst a connection is live, and if the replacement is running, say, OpenAFS 1.6.4 or older or an older IBM AFS, then the replacement server will not respond to packets sent to the upgraded connection. At this point, calls will time out and the server must be reprobed. Signed-off-by: David Howells --- include/linux/rxrpc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h index 634116561a6a..707910c6c6c5 100644 --- a/include/linux/rxrpc.h +++ b/include/linux/rxrpc.h @@ -54,6 +54,7 @@ struct sockaddr_rxrpc { #define RXRPC_NEW_CALL 8 /* -r: [Service] new incoming call notification */ #define RXRPC_ACCEPT 9 /* s-: [Service] accept request */ #define RXRPC_EXCLUSIVE_CALL 10 /* s-: Call should be on exclusive connection */ +#define RXRPC_UPGRADE_SERVICE 11 /* s-: Request service upgrade for client call */ /* * RxRPC security levels -- cgit v1.2.3 From 41bb26f8db3ad33b083e57eb9fc5828796110e77 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 28 May 2017 08:56:46 +0300 Subject: uuid,afs: move struct uuid_v1 back into afs This essentially is a partial revert of commit ff548773 ("afs: Move UUID struct to linux/uuid.h") and moves struct uuid_v1 back into fs/afs as struct afs_uuid. It however keeps it as big endian structure so that we can use the normal uuid generation helpers when casting to/from struct afs_uuid. The V1 uuid intrepretation in struct form isn't really useful to the rest of the kernel, and not really compatible to it either, so move it back to AFS instead of polluting the global uuid.h. Signed-off-by: Christoph Hellwig Acked-by: David Howells --- include/linux/uuid.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 4dff73a89758..2d095fc60204 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -18,30 +18,6 @@ #include -/* - * V1 (time-based) UUID definition [RFC 4122]. - * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns - * increments since midnight 15th October 1582 - * - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID - * time - * - the clock sequence is a 14-bit counter to avoid duplicate times - */ -struct uuid_v1 { - __be32 time_low; /* low part of timestamp */ - __be16 time_mid; /* mid part of timestamp */ - __be16 time_hi_and_version; /* high part of timestamp and version */ -#define UUID_TO_UNIX_TIME 0x01b21dd213814000ULL -#define UUID_TIMEHI_MASK 0x0fff -#define UUID_VERSION_TIME 0x1000 /* time-based UUID */ -#define UUID_VERSION_NAME 0x3000 /* name-based UUID */ -#define UUID_VERSION_RANDOM 0x4000 /* (pseudo-)random generated UUID */ - u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ -#define UUID_CLOCKHI_MASK 0x3f -#define UUID_VARIANT_STD 0x80 - u8 clock_seq_low; /* clock seq low */ - u8 node[6]; /* spatially unique node ID (MAC addr) */ -}; - /* * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") * not including trailing NUL. -- cgit v1.2.3 From 60927bc314363f91616c1f4577541c2a2e27aba3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 09:56:45 +0200 Subject: uuid: remove uuid_be defintions from the uapi header We don't use uuid_be and the UUID_BE constants in any uapi headers, so make them private to the kernel. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- include/linux/uuid.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 2d095fc60204..30fb13018e29 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -18,6 +18,21 @@ #include +typedef struct { + __u8 b[16]; +} uuid_be; + +#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((uuid_be) \ +{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ + ((b) >> 8) & 0xff, (b) & 0xff, \ + ((c) >> 8) & 0xff, (c) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + +#define NULL_UUID_BE \ + UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00) + /* * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") * not including trailing NUL. -- cgit v1.2.3 From f9727a17db9bab71ddae91f74f11a8a2f9a0ece6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 10:02:48 +0200 Subject: uuid: rename uuid types Our "little endian" UUID really is a Wintel GUID, so rename it and its helpers such (guid_t). The big endian UUID is the only true one, so give it the name uuid_t. The uuid_le and uuid_be names are retained for now, but will hopefully go away soon. The exception to that are the _cmp helpers that will be replaced by better primitives ASAP and thus don't get the new names. Also the _to_bin helpers are named to match the better named uuid_parse routine in userspace. Also remove the existing typedef in XFS that's now been superceeded by the generic type name. Signed-off-by: Christoph Hellwig [andy: also update the UUID_LE/UUID_BE macros including fallout] Signed-off-by: Andy Shevchenko Reviewed-by: Amir Goldstein Reviewed-by: Darrick J. Wong Reviewed-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- include/linux/uuid.h | 55 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 30fb13018e29..c2adb8046095 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -20,46 +20,55 @@ typedef struct { __u8 b[16]; -} uuid_be; +} uuid_t; -#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ -((uuid_be) \ +#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((uuid_t) \ {{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ ((b) >> 8) & 0xff, (b) & 0xff, \ ((c) >> 8) & 0xff, (c) & 0xff, \ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) -#define NULL_UUID_BE \ - UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ - 0x00, 0x00, 0x00, 0x00) - /* * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") * not including trailing NUL. */ #define UUID_STRING_LEN 36 -static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) -{ - return memcmp(&u1, &u2, sizeof(uuid_le)); -} - -static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2) -{ - return memcmp(&u1, &u2, sizeof(uuid_be)); -} - void generate_random_uuid(unsigned char uuid[16]); -extern void uuid_le_gen(uuid_le *u); -extern void uuid_be_gen(uuid_be *u); +extern void guid_gen(guid_t *u); +extern void uuid_gen(uuid_t *u); bool __must_check uuid_is_valid(const char *uuid); -extern const u8 uuid_le_index[16]; -extern const u8 uuid_be_index[16]; +extern const u8 guid_index[16]; +extern const u8 uuid_index[16]; + +int guid_parse(const char *uuid, guid_t *u); +int uuid_parse(const char *uuid, uuid_t *u); -int uuid_le_to_bin(const char *uuid, uuid_le *u); -int uuid_be_to_bin(const char *uuid, uuid_be *u); +/* backwards compatibility, don't use in new code */ +typedef uuid_t uuid_be; +#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) +#define NULL_UUID_BE \ + UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00) + +#define uuid_le_gen(u) guid_gen(u) +#define uuid_be_gen(u) uuid_gen(u) +#define uuid_le_to_bin(guid, u) guid_parse(guid, u) +#define uuid_be_to_bin(uuid, u) uuid_parse(uuid, u) + +static inline int uuid_le_cmp(const guid_t u1, const guid_t u2) +{ + return memcmp(&u1, &u2, sizeof(guid_t)); +} + +static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2) +{ + return memcmp(&u1, &u2, sizeof(uuid_t)); +} #endif -- cgit v1.2.3 From df33767d9fe0ca93c606cc9042df05e5045c8158 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 May 2017 14:00:57 +0200 Subject: uuid: hoist helpers uuid_equal() and uuid_copy() from xfs These helper are used to compare and copy two uuid_t type objects. Signed-off-by: Amir Goldstein [hch: also provide the respective guid_ versions] Signed-off-by: Christoph Hellwig Reviewed-by: Andy Shevchenko --- include/linux/uuid.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index c2adb8046095..777f9cb01eb1 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -35,6 +35,26 @@ typedef struct { */ #define UUID_STRING_LEN 36 +static inline bool guid_equal(const guid_t *u1, const guid_t *u2) +{ + return memcmp(u1, u2, sizeof(guid_t)) == 0; +} + +static inline void guid_copy(guid_t *dst, const guid_t *src) +{ + memcpy(dst, src, sizeof(guid_t)); +} + +static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2) +{ + return memcmp(u1, u2, sizeof(uuid_t)) == 0; +} + +static inline void uuid_copy(uuid_t *dst, const uuid_t *src) +{ + memcpy(dst, src, sizeof(uuid_t)); +} + void generate_random_uuid(unsigned char uuid[16]); extern void guid_gen(guid_t *u); -- cgit v1.2.3 From ef40dda5bbc310f6517082c0ff002913104358cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 May 2017 09:01:42 +0200 Subject: uuid: hoist uuid_is_null() helper from libnvdimm Hoist the libnvdimm helper as an inline helper to linux/uuid.h using an auxiliary const variable uuid_null in lib/uuid.c. [hch: also add the guid variant. Both do the same but I'd like to keep casts to a minimum] The common helper uses the new abstract type uuid_t * instead of u8 *. Suggested-by: Christoph Hellwig Signed-off-by: Amir Goldstein [hch: added guid_is_null] Signed-off-by: Christoph Hellwig Acked-by: Dan Williams Reviewed-by: Andy Shevchenko --- include/linux/uuid.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 777f9cb01eb1..75f7182d5360 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -35,6 +35,9 @@ typedef struct { */ #define UUID_STRING_LEN 36 +extern const guid_t guid_null; +extern const uuid_t uuid_null; + static inline bool guid_equal(const guid_t *u1, const guid_t *u2) { return memcmp(u1, u2, sizeof(guid_t)) == 0; @@ -45,6 +48,11 @@ static inline void guid_copy(guid_t *dst, const guid_t *src) memcpy(dst, src, sizeof(guid_t)); } +static inline bool guid_is_null(guid_t *guid) +{ + return guid_equal(guid, &guid_null); +} + static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2) { return memcmp(u1, u2, sizeof(uuid_t)) == 0; @@ -55,6 +63,11 @@ static inline void uuid_copy(uuid_t *dst, const uuid_t *src) memcpy(dst, src, sizeof(uuid_t)); } +static inline bool uuid_is_null(uuid_t *uuid) +{ + return uuid_equal(uuid, &uuid_null); +} + void generate_random_uuid(unsigned char uuid[16]); extern void guid_gen(guid_t *u); -- cgit v1.2.3 From 1dd771eb0b09fe9c12ea58b18c676b32a528be39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 May 2017 15:16:44 +0200 Subject: block: remove blk_part_pack_uuid This helper was only used by IMA of all things, which would get spurious errors if CONFIG_BLOCK is disabled. Just opencode the call there. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Acked-by: Mimi Zohar Reviewed-by: Andy Shevchenko --- include/linux/genhd.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index acff9437e5c3..e619fae2f037 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -219,12 +219,6 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } -static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to) -{ - uuid_be_to_bin(uuid_str, (uuid_be *)to); - return 0; -} - static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -736,11 +730,6 @@ static inline dev_t blk_lookup_devt(const char *name, int partno) dev_t devt = MKDEV(0, 0); return devt; } - -static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to) -{ - return -EINVAL; -} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_GENHD_H */ -- cgit v1.2.3 From 85787090a21eb749d8b347eaf9ff1a455637473c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 May 2017 15:06:33 +0200 Subject: fs: switch ->s_uuid to uuid_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some file systems we still memcpy into it, but in various places this already allows us to use the proper uuid helpers. More to come.. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Acked-by: Mimi Zohar  (Changes to IMA/EVM) Reviewed-by: Andy Shevchenko --- include/linux/cleancache.h | 2 +- include/linux/fs.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index fccf7f44139d..bbb3712dd892 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -27,7 +27,7 @@ struct cleancache_filekey { struct cleancache_ops { int (*init_fs)(size_t); - int (*init_shared_fs)(char *uuid, size_t); + int (*init_shared_fs)(uuid_t *uuid, size_t); int (*get_page)(int, struct cleancache_filekey, pgoff_t, struct page *); void (*put_page)(int, struct cleancache_filekey, diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..3e68cabb8457 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -1328,8 +1329,8 @@ struct super_block { struct sb_writers s_writers; - char s_id[32]; /* Informational name */ - u8 s_uuid[16]; /* UUID */ + char s_id[32]; /* Informational name */ + uuid_t s_uuid; /* UUID */ void *s_fs_info; /* Filesystem private info */ unsigned int s_max_links; -- cgit v1.2.3 From 8e41226324e7c00f2087bfbc9f470d665e92df18 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 09:54:27 +0200 Subject: nvme: switch to uuid_t Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- include/linux/nvme-fc.h | 3 +-- include/linux/nvme.h | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index e997c4a49a88..bc711a10be05 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -177,7 +177,6 @@ struct fcnvme_lsdesc_rjt { }; -#define FCNVME_ASSOC_HOSTID_LEN 16 #define FCNVME_ASSOC_HOSTNQN_LEN 256 #define FCNVME_ASSOC_SUBNQN_LEN 256 @@ -191,7 +190,7 @@ struct fcnvme_lsdesc_cr_assoc_cmd { __be16 cntlid; __be16 sqsize; __be32 rsvd52; - u8 hostid[FCNVME_ASSOC_HOSTID_LEN]; + uuid_t hostid; u8 hostnqn[FCNVME_ASSOC_HOSTNQN_LEN]; u8 subnqn[FCNVME_ASSOC_SUBNQN_LEN]; u8 rsvd632[384]; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b625bacf37ef..e400a69fa1d3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -16,6 +16,7 @@ #define _LINUX_NVME_H #include +#include /* NQN names in commands fields specified one size */ #define NVMF_NQN_FIELD_LEN 256 @@ -843,7 +844,7 @@ struct nvmf_connect_command { }; struct nvmf_connect_data { - __u8 hostid[16]; + uuid_t hostid; __le16 cntlid; char resv4[238]; char subsysnqn[NVMF_NQN_FIELD_LEN]; -- cgit v1.2.3 From 82c01a84d5a9bd3b9347bb03eed2f05bbccef933 Mon Sep 17 00:00:00 2001 From: "yuval.shaia@oracle.com" Date: Sun, 4 Jun 2017 20:22:00 +0300 Subject: net/{mii, smsc}: Make mii_ethtool_get_link_ksettings and smc_netdev_get_ecmd return void Make return value void since functions never returns meaningfull value. Signed-off-by: Yuval Shaia Signed-off-by: David S. Miller --- include/linux/mii.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 1629a0c32679..e870bfa6abfe 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -31,7 +31,7 @@ struct mii_if_info { extern int mii_link_ok (struct mii_if_info *mii); extern int mii_nway_restart (struct mii_if_info *mii); extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); -extern int mii_ethtool_get_link_ksettings( +extern void mii_ethtool_get_link_ksettings( struct mii_if_info *mii, struct ethtool_link_ksettings *cmd); extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); extern int mii_ethtool_set_link_ksettings( -- cgit v1.2.3 From 3fabd628d5ea24b02ddb1230ffca1df0f779f84e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 2 Jun 2017 13:52:02 +0000 Subject: efi/capsule-loader: Redirect calls to efi_capsule_setup_info() via weak alias To allow platform specific code to hook into the capsule loading routines, indirect calls to efi_capsule_setup_info() via a weak alias of __efi_capsule_setup_info(), allowing platforms to redefine the former but still use the latter. Tested-by: Bryan O'Donoghue Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170602135207.21708-9-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index ec36f42a2add..a7379a2b5680 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -137,6 +137,18 @@ struct efi_boot_memmap { #define EFI_CAPSULE_POPULATE_SYSTEM_TABLE 0x00020000 #define EFI_CAPSULE_INITIATE_RESET 0x00040000 +struct capsule_info { + efi_capsule_header_t header; + int reset_type; + long index; + size_t count; + size_t total_size; + struct page **pages; + size_t page_bytes_remain; +}; + +int __efi_capsule_setup_info(struct capsule_info *cap_info); + /* * Allocation types for calls to boottime->allocate_pages. */ -- cgit v1.2.3 From 2a457fb31df62c6b482f78e4f74aaed99271f44d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 2 Jun 2017 13:52:03 +0000 Subject: efi/capsule-loader: Use page addresses rather than struct page pointers To give some leeway to code that handles non-standard capsule headers, let's keep an array of page addresses rather than struct page pointers. This gives special implementations of efi_capsule_setup_info() the opportunity to mangle the payload a bit before it is presented to the firmware, without putting any knowledge of the nature of such quirks into the generic code. Tested-by: Bryan O'Donoghue Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170602135207.21708-10-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index a7379a2b5680..8269bcb8ccf7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -143,7 +143,7 @@ struct capsule_info { long index; size_t count; size_t total_size; - struct page **pages; + phys_addr_t *pages; size_t page_bytes_remain; }; @@ -1415,7 +1415,7 @@ extern int efi_capsule_supported(efi_guid_t guid, u32 flags, size_t size, int *reset); extern int efi_capsule_update(efi_capsule_header_t *capsule, - struct page **pages); + phys_addr_t *pages); #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); -- cgit v1.2.3 From 41c8bdb3ab10c1fefcac61d081e2fd9aaf8694b8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 5 Jun 2017 19:40:42 +0300 Subject: acpi, nfit: Switch to use new generic UUID API There are new types and helpers that are supposed to be used in new code. As a preparation to get rid of legacy types and API functions do the conversion here. Reviewed-by: Dan Williams Signed-off-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 137e4a3d89c5..b0e1636ca5c3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 1d51d5f3907abf86ef0521971bcddf5853564263 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Jun 2017 14:42:22 +0200 Subject: libata: clarify log page naming / grouping Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo --- include/linux/ata.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 73fe18edfdaf..c14bdcf31fdb 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -336,11 +336,15 @@ enum { /* READ_LOG_EXT pages */ ATA_LOG_DIRECTORY = 0x0, ATA_LOG_SATA_NCQ = 0x10, - ATA_LOG_NCQ_NON_DATA = 0x12, - ATA_LOG_NCQ_SEND_RECV = 0x13, - ATA_LOG_SATA_ID_DEV_DATA = 0x30, + ATA_LOG_NCQ_NON_DATA = 0x12, + ATA_LOG_NCQ_SEND_RECV = 0x13, + ATA_LOG_IDENTIFY_DEVICE = 0x30, + + /* Identify device log pages: */ ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, + + /* Identify device SATA settings log:*/ ATA_LOG_DEVSLP_OFFSET = 0x30, ATA_LOG_DEVSLP_SIZE = 0x08, ATA_LOG_DEVSLP_MDAT = 0x00, -- cgit v1.2.3 From 818831c8b22f75353f59a63a484e20736c0567c9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Jun 2017 14:42:24 +0200 Subject: libata: implement SECURITY PROTOCOL IN/OUT This allows us to use the generic OPAL code with ATA devices. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo --- include/linux/ata.h | 1 + include/linux/libata.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index c14bdcf31fdb..e65ae4b2ed48 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -341,6 +341,7 @@ enum { ATA_LOG_IDENTIFY_DEVICE = 0x30, /* Identify device log pages: */ + ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, diff --git a/include/linux/libata.h b/include/linux/libata.h index 9e6633235ad7..55de3da58b1c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -156,6 +156,7 @@ enum { ATA_DFLAG_ACPI_PENDING = (1 << 5), /* ACPI resume action pending */ ATA_DFLAG_ACPI_FAILED = (1 << 6), /* ACPI on devcfg has failed */ ATA_DFLAG_AN = (1 << 7), /* AN configured */ + ATA_DFLAG_TRUSTED = (1 << 8), /* device supports trusted send/recv */ ATA_DFLAG_DMADIR = (1 << 10), /* device requires DMADIR */ ATA_DFLAG_CFG_MASK = (1 << 12) - 1, -- cgit v1.2.3 From 844af950da946cfab227a04b950614da04cb6275 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 24 Nov 2015 19:49:23 -0800 Subject: platform/x86: wmi: Turn WMI into a bus driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WMI is logically a bus: the WMI driver binds to an ACPI node (or more than one), and each instance of the WMI driver enumerates its children and hopes that drivers will attach to the children that are useful. This patch gives WMI a driver model bus type and the ability to match to drivers. The bus itself is a device in the new "wmi_bus" class, and all of the individual WMI devices are slotted into the device hierarchy correctly. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Mario Limonciello Cc: Pali Rohár Cc: linux-kernel@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Cc: linux-acpi@vger.kernel.org Acked-by: Rafael J. Wysocki Signed-off-by: Darren Hart (VMware) --- include/linux/wmi.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/linux/wmi.h (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h new file mode 100644 index 000000000000..29ed34b4dae1 --- /dev/null +++ b/include/linux/wmi.h @@ -0,0 +1,47 @@ +/* + * wmi.h - ACPI WMI interface + * + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _LINUX_WMI_H +#define _LINUX_WMI_H + +#include +#include + +struct wmi_device { + struct device dev; +}; + +struct wmi_device_id { + const char *guid_string; +}; + +struct wmi_driver { + struct device_driver driver; + const struct wmi_device_id *id_table; + + int (*probe)(struct wmi_device *wdev); + int (*remove)(struct wmi_device *wdev); +}; + +extern int __must_check __wmi_driver_register(struct wmi_driver *driver, + struct module *owner); +extern void wmi_driver_unregister(struct wmi_driver *driver); +#define wmi_driver_register(driver) __wmi_driver_register((driver), THIS_MODULE) + +#define module_wmi_driver(__wmi_driver) \ + module_driver(__wmi_driver, wmi_driver_register, \ + wmi_driver_unregister) + +#endif -- cgit v1.2.3 From d4fc91adfde11c41295d1cf001bdbec5d6879016 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 25 Nov 2015 14:03:43 -0800 Subject: platform/x86: wmi: Probe data objects for read and write capabilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Dell XPS 13 9350 has one RW data object, one RO data object, and one totally inaccessible data object. Check for the existence of the accessor methods and report in sysfs. The docs also permit WQxx getters for single-instance objects to take no parameters. Probe for that as well to avoid ACPICA warnings about mismatched signatures. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Mario Limonciello Cc: Pali Rohár Cc: linux-kernel@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Cc: linux-acpi@vger.kernel.org Acked-by: Rafael J. Wysocki Signed-off-by: Darren Hart (VMware) --- include/linux/wmi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 29ed34b4dae1..53095006821e 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -21,6 +21,12 @@ struct wmi_device { struct device dev; + + /* + * These are true for data objects that support reads and writes, + * respectively. + */ + bool readable, writeable; }; struct wmi_device_id { -- cgit v1.2.3 From 1686f5444546c3b53547aa8736afcf05833ed31a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 25 Nov 2015 17:33:25 -0800 Subject: platform/x86: wmi: Incorporate acpi_install_notify_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a platform driver, acpi_driver.notify will not be available, so use acpi_install_notify_handler as we will be converting to a platform driver. This gives event drivers a simple way to handle events. It also seems closer to what the Windows docs suggest that Windows does: it sounds like, in Windows, the mapper is responsible for called _WED before dispatching to the subdriver. Signed-off-by: Andy Lutomirski [dvhart: merge two development commits and update commit message] Cc: Andy Lutomirski Cc: Mario Limonciello Cc: Pali Rohár Cc: linux-kernel@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Cc: linux-acpi@vger.kernel.org Acked-by: Rafael J. Wysocki Signed-off-by: Darren Hart (VMware) --- include/linux/wmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 53095006821e..c6eedfd94e7d 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -39,6 +39,7 @@ struct wmi_driver { int (*probe)(struct wmi_device *wdev); int (*remove)(struct wmi_device *wdev); + void (*notify)(struct wmi_device *device, union acpi_object *data); }; extern int __must_check __wmi_driver_register(struct wmi_driver *driver, -- cgit v1.2.3 From 56a370259db4f6204d3514431d1629e0a7135b53 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 25 Nov 2015 18:19:26 -0800 Subject: platform/x86: wmi: Add a new interface to read block data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wmi_query_block is unnecessarily indirect. Add a straightforward method for wmi bus drivers to use to read block data. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Mario Limonciello Cc: Pali Rohár Cc: linux-kernel@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Cc: linux-acpi@vger.kernel.org Acked-by: Rafael J. Wysocki Signed-off-by: Darren Hart (VMware) --- include/linux/wmi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index c6eedfd94e7d..0ab254019488 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -29,6 +29,10 @@ struct wmi_device { bool readable, writeable; }; +/* Caller must kfree the result. */ +extern union acpi_object *wmidev_block_query(struct wmi_device *wdev, + u8 instance); + struct wmi_device_id { const char *guid_string; }; -- cgit v1.2.3 From f63019861cd1192e546397b13f926876a93450fd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 29 Dec 2015 22:53:51 -0800 Subject: platform/x86: wmi: Add an interface for subdrivers to access sibling devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some subdrivers need to access sibling devices. This gives them a clean way to do so. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Mario Limonciello Cc: Pali Rohár Cc: linux-kernel@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Cc: linux-acpi@vger.kernel.org Acked-by: Rafael J. Wysocki Signed-off-by: Darren Hart (VMware) --- include/linux/wmi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 0ab254019488..a283768afb7e 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -33,6 +33,10 @@ struct wmi_device { extern union acpi_object *wmidev_block_query(struct wmi_device *wdev, u8 instance); +/* Gets another device on the same bus. Caller must put_device the result. */ +extern struct wmi_device *wmidev_get_other_guid(struct wmi_device *wdev, + const char *guid_string); + struct wmi_device_id { const char *guid_string; }; -- cgit v1.2.3 From fd70da6a6267c91fbdda9c560f098cfd52fba00f Mon Sep 17 00:00:00 2001 From: "Darren Hart (VMware)" Date: Fri, 19 May 2017 19:28:36 -0700 Subject: platform/x86: wmi: Require query for data blocks, rename writable to setable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Microsoft WMI documentation requires all data blocks to implement the Query Control Method (WQxx). If we encounter a data block not implementing this control method, issue a warning, and ignore the data block. Remove the "readable" attribute as all data blocks must be readable (query-able). Be consistent with the language in the documentation, replace the "writable" attribute with "setable". Simplify (flatten) the control flow of wmi_create_device a bit while we are updating it for the above changes. Signed-off-by: Darren Hart (VMware) Cc: Andy Lutomirski Cc: Mario Limonciello Cc: Pali Rohár Cc: linux-kernel@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Cc: linux-acpi@vger.kernel.org Acked-by: Rafael J. Wysocki --- include/linux/wmi.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index a283768afb7e..cd0d7734dc49 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -22,11 +22,8 @@ struct wmi_device { struct device dev; - /* - * These are true for data objects that support reads and writes, - * respectively. - */ - bool readable, writeable; + /* True for data blocks implementing the Set Control Method */ + bool setable; }; /* Caller must kfree the result. */ -- cgit v1.2.3 From cc5d0db390b0ff0f5da95b643a2b070da15a9c3e Mon Sep 17 00:00:00 2001 From: "Alex A. Mihaylov" Date: Fri, 2 Jun 2017 10:06:27 +0300 Subject: regmap: Add 1-Wire bus support Add basic support regmap (register map access) API for 1-Wire bus Signed-off-by: Mark Brown --- include/linux/regmap.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e88649225a60..86eeacc1425a 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -461,6 +461,10 @@ struct regmap *__regmap_init_spmi_ext(struct spmi_device *dev, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__regmap_init_w1(struct device *w1_dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__regmap_init_mmio_clk(struct device *dev, const char *clk_id, void __iomem *regs, const struct regmap_config *config, @@ -493,6 +497,10 @@ struct regmap *__devm_regmap_init_spmi_ext(struct spmi_device *dev, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__devm_regmap_init_w1(struct device *w1_dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id, void __iomem *regs, @@ -596,6 +604,19 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, __regmap_lockdep_wrapper(__regmap_init_spmi_ext, #config, \ dev, config) +/** + * regmap_init_w1() - Initialise register map + * + * @w1_dev: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_w1(w1_dev, config) \ + __regmap_lockdep_wrapper(__regmap_init_w1, #config, \ + w1_dev, config) + /** * regmap_init_mmio_clk() - Initialise register map with register clock * @@ -711,6 +732,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__devm_regmap_init_spmi_ext, #config, \ dev, config) +/** + * devm_regmap_init_w1() - Initialise managed register map + * + * @w1_dev: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_w1(w1_dev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_w1, #config, \ + w1_dev, config) /** * devm_regmap_init_mmio_clk() - Initialise managed register map with clock * -- cgit v1.2.3 From fc0b2acc754a183aa79e2abb8bca8fd915832694 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 30 May 2017 17:31:21 +0300 Subject: spi: pxa2xx: Add support for Intel Cannonlake Intel Cannonlake LPSS SPI has up to four chip selects per port like in Broxton and is clocked like Sunrisepoint and Kaby Lake. Add a new type LPSS_CNL_SSP and configuration that enable runtime chip select detection and use the same FIFO thresholds than in Sunrisepoint. Patch adds support for both Cannonlake SoC and PCH. Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index a0522328d7aa..8461b18e4608 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -196,6 +196,7 @@ enum pxa_ssp_type { LPSS_BSW_SSP, LPSS_SPT_SSP, LPSS_BXT_SSP, + LPSS_CNL_SSP, }; struct ssp_device { -- cgit v1.2.3 From f8fe99754673719ab791713a676bf27dae616fbc Mon Sep 17 00:00:00 2001 From: "yuval.shaia@oracle.com" Date: Mon, 5 Jun 2017 10:18:40 +0300 Subject: net: phy: Delete unused function phy_ethtool_gset It's unused, so remove it. Signed-off-by: Yuval Shaia Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 58f1b45a4c44..748e526c0698 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -854,7 +854,6 @@ void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); void phy_trigger_machine(struct phy_device *phydev, bool sync); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); -int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd); int phy_ethtool_ksettings_set(struct phy_device *phydev, -- cgit v1.2.3 From dc4bb0e2356149aee4cdae061936f3bbdd45595c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 5 Jun 2017 12:15:46 -0700 Subject: bpf: Introduce bpf_prog ID This patch generates an unique ID for each BPF_PROG_LOAD-ed prog. It is worth to note that each BPF_PROG_LOAD-ed prog will have a different ID even they have the same bpf instructions. The ID is generated by the existing idr_alloc_cyclic(). The ID is ranged from [1, INT_MAX). It is allocated in cyclic manner, so an ID will get reused every 2 billion BPF_PROG_LOAD. The bpf_prog_alloc_id() is done after bpf_prog_select_runtime() because the jit process may have allocated a new prog. Hence, we need to ensure the value of pointer 'prog' will not be changed any more before storing the prog to the prog_idr. After bpf_prog_select_runtime(), the prog is read-only. Hence, the id is stored in 'struct bpf_prog_aux'. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fcc80ca11045..c5946d19f2ca 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -172,6 +172,7 @@ struct bpf_prog_aux { u32 used_map_cnt; u32 max_ctx_offset; u32 stack_depth; + u32 id; struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; const struct bpf_verifier_ops *ops; -- cgit v1.2.3 From f3f1c054c288bb6e503005e6d73611151ed20e91 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 5 Jun 2017 12:15:47 -0700 Subject: bpf: Introduce bpf_map ID This patch generates an unique ID for each created bpf_map. The approach is similar to the earlier patch for bpf_prog ID. It is worth to note that the bpf_map's ID and bpf_prog's ID are in two independent ID spaces and both have the same valid range: [1, INT_MAX). Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c5946d19f2ca..c32bace66d3d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -46,6 +46,7 @@ struct bpf_map { u32 max_entries; u32 map_flags; u32 pages; + u32 id; struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; -- cgit v1.2.3 From 783d28dd11f68fb25d1f2e0de7c42336394ef128 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 5 Jun 2017 12:15:51 -0700 Subject: bpf: Add jited_len to struct bpf_prog Add jited_len to struct bpf_prog. It will be useful for the struct bpf_prog_info which will be added in the later patch. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index a20ba40fcb73..1e2dddf21f3b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -432,6 +432,7 @@ struct bpf_prog { kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ + u32 jited_len; /* Size of jited insns in bytes */ u8 tag[BPF_TAG_SIZE]; struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ -- cgit v1.2.3 From 1e270976908686ec25fb91b8a34145be54137976 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 5 Jun 2017 12:15:52 -0700 Subject: bpf: Add BPF_OBJ_GET_INFO_BY_FD A single BPF_OBJ_GET_INFO_BY_FD cmd is used to obtain the info for both bpf_prog and bpf_map. The kernel can figure out the fd is associated with a bpf_prog or bpf_map. The suggested struct bpf_prog_info and struct bpf_map_info are not meant to be a complete list and it is not the goal of this patch. New fields can be added in the future patch. The focus of this patch is to create the interface, BPF_OBJ_GET_INFO_BY_FD cmd for exposing the bpf_prog's and bpf_map's info. The obj's info, which will be extended (and get bigger) over time, is separated from the bpf_attr to avoid bloating the bpf_attr. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1e2dddf21f3b..1fa26dc562ce 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -69,8 +69,6 @@ struct bpf_prog_aux; /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 -#define BPF_TAG_SIZE 8 - /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ -- cgit v1.2.3 From 5acde34a5a420ffe7441bb7d3909dc2618025c3c Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 5 Jun 2017 12:22:50 +0100 Subject: net: phy: add 802.3 clause 45 support to phylib Add generic helpers for 802.3 clause 45 PHYs for >= 10Gbps support. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 748e526c0698..a47eb5e841d2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -827,6 +827,8 @@ static inline const char *phydev_name(const struct phy_device *phydev) void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) __printf(2, 3); void phy_attached_info(struct phy_device *phydev); + +/* Clause 22 PHY */ int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); @@ -841,6 +843,16 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev) { return 0; } + +/* Clause 45 PHY */ +int genphy_c45_restart_aneg(struct phy_device *phydev); +int genphy_c45_aneg_done(struct phy_device *phydev); +int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask); +int genphy_c45_read_lpa(struct phy_device *phydev); +int genphy_c45_read_pma(struct phy_device *phydev); +int genphy_c45_pma_setup_forced(struct phy_device *phydev); +int genphy_c45_an_disable_aneg(struct phy_device *phydev); + void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); -- cgit v1.2.3 From 002ba7058a7f141cf22d37967a4ef78239c50e9e Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 5 Jun 2017 12:23:00 +0100 Subject: net: phy: hook up clause 45 autonegotiation restart genphy_restart_aneg() can only restart autonegotiation on clause 22 PHYs. Add a phy_restart_aneg() function which selects between the clause 22 and clause 45 restart functionality depending on the PHY type and whether the Clause 45 PHY supports the Clause 22 register set. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index a47eb5e841d2..b24de9ddc886 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -804,6 +804,7 @@ int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); +int phy_restart_aneg(struct phy_device *phydev); static inline int phy_read_status(struct phy_device *phydev) { -- cgit v1.2.3 From c125ca091873f2e848cc31c2371a3a66c2fd4dd8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 5 Jun 2017 12:23:10 +0100 Subject: net: phy: add XAUI and 10GBASE-KR PHY connection types XAUI allows XGMII to reach an extended distance by using a XGXS layer at each end of the MAC to PHY link, operating over four Serdes lanes. 10GBASE-KR is a single lane Serdes backplane ethernet connection method with autonegotiation on the link. Some PHYs use this to connect to the ethernet interface at 10G speeds, switching to other connection types when utilising slower speeds. 10GBASE-KR is also used for XFI and SFI to connect to XFP and SFP fiber modules. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index b24de9ddc886..414242200a90 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -83,6 +83,9 @@ typedef enum { PHY_INTERFACE_MODE_1000BASEX, PHY_INTERFACE_MODE_2500BASEX, PHY_INTERFACE_MODE_RXAUI, + PHY_INTERFACE_MODE_XAUI, + /* 10GBASE-KR, XFI, SFI - single lane 10G Serdes */ + PHY_INTERFACE_MODE_10GKR, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -149,6 +152,10 @@ static inline const char *phy_modes(phy_interface_t interface) return "2500base-x"; case PHY_INTERFACE_MODE_RXAUI: return "rxaui"; + case PHY_INTERFACE_MODE_XAUI: + return "xaui"; + case PHY_INTERFACE_MODE_10GKR: + return "10gbase-kr"; default: return "unknown"; } -- cgit v1.2.3 From 6f428096a4d1e1809b162ca40dec5f7d09f3f1d1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 6 Jun 2017 15:32:03 +0200 Subject: driver core: remove CLASS_ATTR usage There was only 2 remaining users of CLASS_ATTR() so let's finally get rid of them and force everyone to use the correct RW/RO/WO versions instead. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Acked-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 9ef518af5515..9a902ae33932 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -465,8 +465,6 @@ struct class_attribute { const char *buf, size_t count); }; -#define CLASS_ATTR(_name, _mode, _show, _store) \ - struct class_attribute class_attr_##_name = __ATTR(_name, _mode, _show, _store) #define CLASS_ATTR_RW(_name) \ struct class_attribute class_attr_##_name = __ATTR_RW(_name) #define CLASS_ATTR_RO(_name) \ -- cgit v1.2.3 From 94116f8126de9762751fd92731581b73b56292e5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 5 Jun 2017 19:40:46 +0300 Subject: ACPI: Switch to use generic guid_t in acpi_evaluate_dsm() acpi_evaluate_dsm() and friends take a pointer to a raw buffer of 16 bytes. Instead we convert them to use guid_t type. At the same time we convert current users. acpi_str_to_uuid() becomes useless after the conversion and it's safe to get rid of it. Acked-by: Rafael J. Wysocki Cc: Borislav Petkov Acked-by: Dan Williams Cc: Amir Goldstein Reviewed-by: Jarkko Sakkinen Reviewed-by: Jani Nikula Acked-by: Jani Nikula Cc: Ben Skeggs Acked-by: Benjamin Tissoires Acked-by: Joerg Roedel Acked-by: Adrian Hunter Cc: Yisen Zhuang Acked-by: Bjorn Helgaas Acked-by: Felipe Balbi Acked-by: Mathias Nyman Reviewed-by: Heikki Krogerus Acked-by: Mark Brown Signed-off-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- include/linux/acpi.h | 3 +-- include/linux/pci-acpi.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b0e1636ca5c3..ab19365c905f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -458,7 +458,6 @@ struct acpi_osc_context { struct acpi_buffer ret; /* free by caller if success */ }; -acpi_status acpi_str_to_uuid(char *str, u8 *uuid); acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ @@ -742,7 +741,7 @@ static inline bool acpi_driver_match_device(struct device *dev, } static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, - const u8 *uuid, + const guid_t *guid, int rev, int func, union acpi_object *argv4) { diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 7a4e83a8c89c..dd86c97f2454 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -105,7 +105,7 @@ static inline void acpiphp_remove_slots(struct pci_bus *bus) { } static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { } #endif -extern const u8 pci_acpi_dsm_uuid[]; +extern const guid_t pci_acpi_dsm_guid; #define DEVICE_LABEL_DSM 0x07 #define RESET_DELAY_DSM 0x08 #define FUNCTION_DELAY_DSM 0x09 -- cgit v1.2.3 From 22833a9165a1c72a54ddc696a3765bd6f87fbb92 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 17 May 2017 09:40:30 +0200 Subject: clk: divider: Make divider_round_rate take the parent clock So far, divider_round_rate only considers the parent clock returned by clk_hw_get_parent. This works fine on clocks that have a single parents, this doesn't work on muxes, since we will only consider the first parent, while other parents may totally be able to provide a better combination. Clocks in that case cannot use divider_round_rate, so would have to come up with a very similar logic to work around it. Instead of having to do something like this, and duplicate that logic everywhere, create a divider_round_rate parent to allow caller to give an additional parameter for the parent clock to consider. Reviewed-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Acked-by: Stephen Boyd Signed-off-by: Chen-Yu Tsai --- include/linux/clk-provider.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index a428aec36ace..c59c62571e4f 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -412,9 +412,10 @@ extern const struct clk_ops clk_divider_ro_ops; unsigned long divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate, unsigned int val, const struct clk_div_table *table, unsigned long flags); -long divider_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate, const struct clk_div_table *table, - u8 width, unsigned long flags); +long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent, + unsigned long rate, unsigned long *prate, + const struct clk_div_table *table, + u8 width, unsigned long flags); int divider_get_val(unsigned long rate, unsigned long parent_rate, const struct clk_div_table *table, u8 width, unsigned long flags); @@ -757,6 +758,15 @@ static inline void __clk_hw_set_clk(struct clk_hw *dst, struct clk_hw *src) dst->core = src->core; } +static inline long divider_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate, + const struct clk_div_table *table, + u8 width, unsigned long flags) +{ + return divider_round_rate_parent(hw, clk_hw_get_parent(hw), + rate, prate, table, width, flags); +} + /* * FIXME clock api without lock protection */ -- cgit v1.2.3 From 515559ca21713218595f3a4dad44a4e7eea2fcfb Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Jun 2017 16:27:15 +0100 Subject: rxrpc: Provide a getsockopt call to query what cmsgs types are supported Provide a getsockopt() call that can query what cmsg types are supported by AF_RXRPC. --- include/linux/rxrpc.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h index 707910c6c6c5..bdd3175b9a48 100644 --- a/include/linux/rxrpc.h +++ b/include/linux/rxrpc.h @@ -38,6 +38,7 @@ struct sockaddr_rxrpc { #define RXRPC_EXCLUSIVE_CONNECTION 3 /* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */ #define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */ #define RXRPC_UPGRADEABLE_SERVICE 5 /* Upgrade service[0] -> service[1] */ +#define RXRPC_SUPPORTED_CMSG 6 /* Get highest supported control message type */ /* * RxRPC control messages @@ -45,16 +46,19 @@ struct sockaddr_rxrpc { * - terminal messages mean that a user call ID tag can be recycled * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg() */ -#define RXRPC_USER_CALL_ID 1 /* sr: user call ID specifier */ -#define RXRPC_ABORT 2 /* sr: abort request / notification [terminal] */ -#define RXRPC_ACK 3 /* -r: [Service] RPC op final ACK received [terminal] */ -#define RXRPC_NET_ERROR 5 /* -r: network error received [terminal] */ -#define RXRPC_BUSY 6 /* -r: server busy received [terminal] */ -#define RXRPC_LOCAL_ERROR 7 /* -r: local error generated [terminal] */ -#define RXRPC_NEW_CALL 8 /* -r: [Service] new incoming call notification */ -#define RXRPC_ACCEPT 9 /* s-: [Service] accept request */ -#define RXRPC_EXCLUSIVE_CALL 10 /* s-: Call should be on exclusive connection */ -#define RXRPC_UPGRADE_SERVICE 11 /* s-: Request service upgrade for client call */ +enum rxrpc_cmsg_type { + RXRPC_USER_CALL_ID = 1, /* sr: user call ID specifier */ + RXRPC_ABORT = 2, /* sr: abort request / notification [terminal] */ + RXRPC_ACK = 3, /* -r: [Service] RPC op final ACK received [terminal] */ + RXRPC_NET_ERROR = 5, /* -r: network error received [terminal] */ + RXRPC_BUSY = 6, /* -r: server busy received [terminal] */ + RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */ + RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */ + RXRPC_ACCEPT = 9, /* s-: [Service] accept request */ + RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ + RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ + RXRPC__SUPPORTED +}; /* * RxRPC security levels -- cgit v1.2.3 From e754eba685aac2a9b5538176fa2d254ad25f464d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Jun 2017 12:40:03 +0100 Subject: rxrpc: Provide a cmsg to specify the amount of Tx data for a call Provide a control message that can be specified on the first sendmsg() of a client call or the first sendmsg() of a service response to indicate the total length of the data to be transmitted for that call. Currently, because the length of the payload of an encrypted DATA packet is encrypted in front of the data, the packet cannot be encrypted until we know how much data it will hold. By specifying the length at the beginning of the transmit phase, each DATA packet length can be set before we start loading data from userspace (where several sendmsg() calls may contribute to a particular packet). An error will be returned if too little or too much data is presented in the Tx phase. Signed-off-by: David Howells --- include/linux/rxrpc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h index bdd3175b9a48..7343f71783dc 100644 --- a/include/linux/rxrpc.h +++ b/include/linux/rxrpc.h @@ -57,6 +57,7 @@ enum rxrpc_cmsg_type { RXRPC_ACCEPT = 9, /* s-: [Service] accept request */ RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ + RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ RXRPC__SUPPORTED }; -- cgit v1.2.3 From bcbc2265f269cc57924371e3bce8c3220d0270c5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jun 2017 09:02:20 +0200 Subject: acpi: always include uuid.h Without this the build will fail for !CONFIG_ACPI builds on x86. Fixes: 94116f81 ("ACPI: Switch to use generic guid_t in acpi_evaluate_dsm()") Signed-off-by: Christoph Hellwig --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ab19365c905f..cafdfb84ca28 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -26,6 +26,7 @@ #include #include #include +#include #ifndef _LINUX #define _LINUX @@ -39,7 +40,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 5dd0b16cdaff9b94da06074d5888b03235c0bf17 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Jun 2017 07:40:25 -0700 Subject: mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP This fixes CONFIG_SMP=n, CONFIG_DEBUG_TLBFLUSH=y without introducing further #ifdef soup. Caught by a Kbuild bot randconfig build. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: ce4a4e565f52 ("x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code") Link: http://lkml.kernel.org/r/76da9a3cc4415996f2ad2c905b93414add322021.1496673616.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/vm_event_item.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index d84ae90ccd5c..be3ab2d13adf 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -93,10 +93,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif #endif #ifdef CONFIG_DEBUG_TLBFLUSH -#ifdef CONFIG_SMP NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ -#endif /* CONFIG_SMP */ NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, #endif /* CONFIG_DEBUG_TLBFLUSH */ -- cgit v1.2.3 From 209a0cbda7a01d2ea32a8b631d35e873bee498e9 Mon Sep 17 00:00:00 2001 From: Luca Abeni Date: Thu, 18 May 2017 22:13:29 +0200 Subject: sched/deadline: Improve the tracking of active utilization This patch implements a more theoretically sound algorithm for tracking active utilization: instead of decreasing it when a task blocks, use a timer (the "inactive timer", named after the "Inactive" task state of the GRUB algorithm) to decrease the active utilization at the so called "0-lag time". Tested-by: Claudio Scordino Tested-by: Daniel Bristot de Oliveira Signed-off-by: Luca Abeni Signed-off-by: Peter Zijlstra (Intel) Cc: Joel Fernandes Cc: Juri Lelli Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/1495138417-6203-3-git-send-email-luca.abeni@santannapisa.it Signed-off-by: Ingo Molnar --- include/linux/sched.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1abaa3728bf7..f1ead2e88d3d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -445,16 +445,33 @@ struct sched_dl_entity { * * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. + * + * @dl_non_contending tells if the task is inactive while still + * contributing to the active utilization. In other words, it + * indicates if the inactive timer has been armed and its handler + * has not been executed yet. This flag is useful to avoid race + * conditions between the inactive timer handler and the wakeup + * code. */ int dl_throttled; int dl_boosted; int dl_yielded; + int dl_non_contending; /* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task. */ struct hrtimer dl_timer; + + /* + * Inactive timer, responsible for decreasing the active utilization + * at the "0-lag time". When a -deadline task blocks, it contributes + * to GRUB's active utilization until the "0-lag time", hence a + * timer is needed to decrease the active utilization at the correct + * time. + */ + struct hrtimer inactive_timer; }; union rcu_special { -- cgit v1.2.3 From 54d6d3039e2d84b6fbfbe59ec57d856371edf0a2 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Mon, 29 May 2017 16:24:02 +0200 Subject: sched/deadline: Fix dl_bw comment The sched_dl_entity's dl_bw variable stores the utilization (dl_runtime / dl_period) of a task, not its density (dl_runtime / dl_deadline), as the comment says. Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Cc: Juri Lelli Cc: Linus Torvalds Cc: Luca Abeni Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Romulo Silva de Oliveira Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Cc: Xunlei Pang Link: http://lkml.kernel.org/r/8d05f1ccfd02da1a11bda62494d98f5456c1469a.1495803804.git.bristot@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f1ead2e88d3d..3113c828483b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -421,7 +421,7 @@ struct sched_dl_entity { u64 dl_runtime; /* Maximum runtime for each instance */ u64 dl_deadline; /* Relative deadline of each instance */ u64 dl_period; /* Separation of two instances (period) */ - u64 dl_bw; /* dl_runtime / dl_deadline */ + u64 dl_bw; /* dl_runtime / dl_period */ /* * Actual scheduling parameters. Initialized with the values above, -- cgit v1.2.3 From 3effcb4247e74a51f5d8b775a1ee4abf87cc089a Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Mon, 29 May 2017 16:24:03 +0200 Subject: sched/deadline: Use the revised wakeup rule for suspending constrained dl tasks We have been facing some problems with self-suspending constrained deadline tasks. The main reason is that the original CBS was not designed for such sort of tasks. One problem reported by Xunlei Pang takes place when a task suspends, and then is awakened before the deadline, but so close to the deadline that its remaining runtime can cause the task to have an absolute density higher than allowed. In such situation, the original CBS assumes that the task is facing an early activation, and so it replenishes the task and set another deadline, one deadline in the future. This rule works fine for implicit deadline tasks. Moreover, it allows the system to adapt the period of a task in which the external event source suffered from a clock drift. However, this opens the window for bandwidth leakage for constrained deadline tasks. For instance, a task with the following parameters: runtime = 5 ms deadline = 7 ms [density] = 5 / 7 = 0.71 period = 1000 ms If the task runs for 1 ms, and then suspends for another 1ms, it will be awakened with the following parameters: remaining runtime = 4 laxity = 5 presenting a absolute density of 4 / 5 = 0.80. In this case, the original CBS would assume the task had an early wakeup. Then, CBS will reset the runtime, and the absolute deadline will be postponed by one relative deadline, allowing the task to run. The problem is that, if the task runs this pattern forever, it will keep receiving bandwidth, being able to run 1ms every 2ms. Following this behavior, the task would be able to run 500 ms in 1 sec. Thus running more than the 5 ms / 1 sec the admission control allowed it to run. Trying to address the self-suspending case, Luca Abeni, Giuseppe Lipari, and Juri Lelli [1] revisited the CBS in order to deal with self-suspending tasks. In the new approach, rather than replenishing/postponing the absolute deadline, the revised wakeup rule adjusts the remaining runtime, reducing it to fit into the allowed density. A revised version of the idea is: At a given time t, the maximum absolute density of a task cannot be higher than its relative density, that is: runtime / (deadline - t) <= dl_runtime / dl_deadline Knowing the laxity of a task (deadline - t), it is possible to move it to the other side of the equality, thus enabling to define max remaining runtime a task can use within the absolute deadline, without over-running the allowed density: runtime = (dl_runtime / dl_deadline) * (deadline - t) For instance, in our previous example, the task could still run: runtime = ( 5 / 7 ) * 5 runtime = 3.57 ms Without causing damage for other deadline tasks. It is note worthy that the laxity cannot be negative because that would cause a negative runtime. Thus, this patch depends on the patch: df8eac8cafce ("sched/deadline: Throttle a constrained deadline task activated after the deadline") Which throttles a constrained deadline task activated after the deadline. Finally, it is also possible to use the revised wakeup rule for all other tasks, but that would require some more discussions about pros and cons. Reported-by: Xunlei Pang Signed-off-by: Daniel Bristot de Oliveira [peterz: replaced dl_is_constrained with dl_is_implicit] Signed-off-by: Peter Zijlstra (Intel) Cc: Juri Lelli Cc: Linus Torvalds Cc: Luca Abeni Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Romulo Silva de Oliveira Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/5c800ab3a74a168a84ee5f3f84d12a02e11383be.1495803804.git.bristot@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3113c828483b..1f0f427e0292 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -422,6 +422,7 @@ struct sched_dl_entity { u64 dl_deadline; /* Relative deadline of each instance */ u64 dl_period; /* Separation of two instances (period) */ u64 dl_bw; /* dl_runtime / dl_period */ + u64 dl_density; /* dl_runtime / dl_deadline */ /* * Actual scheduling parameters. Initialized with the values above, -- cgit v1.2.3 From f5694788ad8da5da41b501f3d6d2ae22379c4ef9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 19 Sep 2016 12:15:37 +0200 Subject: rt_mutex: Add lockdep annotations Now that (PI) futexes have their own private RT-mutex interface and implementation we can easily add lockdep annotations to the existing RT-mutex interface. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/rtmutex.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 1abba5ce2a2f..44fd002f7cd5 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -37,6 +37,9 @@ struct rt_mutex { int line; void *magic; #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif }; struct rt_mutex_waiter; @@ -58,19 +61,33 @@ struct hrtimer_sleeper; #ifdef CONFIG_DEBUG_RT_MUTEXES # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ , .name = #mutexname, .file = __FILE__, .line = __LINE__ -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) + +# define rt_mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + __rt_mutex_init(mutex, __func__, &__key); \ +} while (0) + extern void rt_mutex_debug_task_free(struct task_struct *tsk); #else # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL) +# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL) # define rt_mutex_debug_task_free(t) do { } while (0) #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ + , .dep_map = { .name = #mutexname } +#else +#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) +#endif + #define __RT_MUTEX_INITIALIZER(mutexname) \ { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ , .waiters = RB_ROOT \ , .owner = NULL \ - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} + __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ + __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} #define DEFINE_RT_MUTEX(mutexname) \ struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) @@ -86,7 +103,7 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock) return lock->owner != NULL; } -extern void __rt_mutex_init(struct rt_mutex *lock, const char *name); +extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key); extern void rt_mutex_destroy(struct rt_mutex *lock); extern void rt_mutex_lock(struct rt_mutex *lock); -- cgit v1.2.3 From 71399aa5d68bb3ed8c4caf8bfd71faae39555876 Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Mon, 8 May 2017 15:02:48 -0700 Subject: power: supply: Add Apple Brick ID power supply type Apple currently supports three very common USB chargers: https://www.apple.com/power-adapters/ These chargers implement a proprietary Apple method for advertising 1A, 2.1A, and 2.4A at 5V called "Brick ID". In addition, 3rd parties implement the same charging method in many charging accessories that work with iOS devices. Devices that have charger detection chips such as the Pericom PI3USB9281, eg. Google Chromebook Pixel 2015, are capable of detecting these chargers, so let's add a type to facilicate passing that info up to userspace. This adds a separate power supply type for Apple's proprietary "Brick ID" charging method. Signed-off-by: Benson Leung Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 3965503315ef..4bd34051995e 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -159,13 +159,14 @@ enum power_supply_type { POWER_SUPPLY_TYPE_BATTERY, POWER_SUPPLY_TYPE_UPS, POWER_SUPPLY_TYPE_MAINS, - POWER_SUPPLY_TYPE_USB, /* Standard Downstream Port */ - POWER_SUPPLY_TYPE_USB_DCP, /* Dedicated Charging Port */ - POWER_SUPPLY_TYPE_USB_CDP, /* Charging Downstream Port */ - POWER_SUPPLY_TYPE_USB_ACA, /* Accessory Charger Adapters */ - POWER_SUPPLY_TYPE_USB_TYPE_C, /* Type C Port */ - POWER_SUPPLY_TYPE_USB_PD, /* Power Delivery Port */ - POWER_SUPPLY_TYPE_USB_PD_DRP, /* PD Dual Role Port */ + POWER_SUPPLY_TYPE_USB, /* Standard Downstream Port */ + POWER_SUPPLY_TYPE_USB_DCP, /* Dedicated Charging Port */ + POWER_SUPPLY_TYPE_USB_CDP, /* Charging Downstream Port */ + POWER_SUPPLY_TYPE_USB_ACA, /* Accessory Charger Adapters */ + POWER_SUPPLY_TYPE_USB_TYPE_C, /* Type C Port */ + POWER_SUPPLY_TYPE_USB_PD, /* Power Delivery Port */ + POWER_SUPPLY_TYPE_USB_PD_DRP, /* PD Dual Role Port */ + POWER_SUPPLY_TYPE_APPLE_BRICK_ID, /* Apple Charging Method */ }; enum power_supply_notifier_events { -- cgit v1.2.3 From 0c90e9c6b5549825e410b6589ad6c4478f81ebad Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 12 Mar 2017 11:35:23 +0200 Subject: net/mlx5: Update flow table commands layout Update struct mlx5_ifc_create(modify)_flow_table_bits according to the last device specification. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 46 +++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 56e96f6a0a45..ec308657af3b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -6627,6 +6627,24 @@ struct mlx5_ifc_create_flow_table_out_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_flow_table_context_bits { + u8 encap_en[0x1]; + u8 decap_en[0x1]; + u8 reserved_at_2[0x2]; + u8 table_miss_action[0x4]; + u8 level[0x8]; + u8 reserved_at_10[0x8]; + u8 log_size[0x8]; + + u8 reserved_at_20[0x8]; + u8 table_miss_id[0x18]; + + u8 reserved_at_40[0x8]; + u8 lag_master_next_table_id[0x18]; + + u8 reserved_at_60[0xe0]; +}; + struct mlx5_ifc_create_flow_table_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -6645,21 +6663,7 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_a0[0x20]; - u8 encap_en[0x1]; - u8 decap_en[0x1]; - u8 reserved_at_c2[0x2]; - u8 table_miss_mode[0x4]; - u8 level[0x8]; - u8 reserved_at_d0[0x8]; - u8 log_size[0x8]; - - u8 reserved_at_e0[0x8]; - u8 table_miss_id[0x18]; - - u8 reserved_at_100[0x8]; - u8 lag_master_next_table_id[0x18]; - - u8 reserved_at_120[0x80]; + struct mlx5_ifc_flow_table_context_bits flow_table_context; }; struct mlx5_ifc_create_flow_group_out_bits { @@ -8277,17 +8281,7 @@ struct mlx5_ifc_modify_flow_table_in_bits { u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_at_c0[0x4]; - u8 table_miss_mode[0x4]; - u8 reserved_at_c8[0x18]; - - u8 reserved_at_e0[0x8]; - u8 table_miss_id[0x18]; - - u8 reserved_at_100[0x8]; - u8 lag_master_next_table_id[0x18]; - - u8 reserved_at_120[0x80]; + struct mlx5_ifc_flow_table_context_bits flow_table_context; }; struct mlx5_ifc_ets_tcn_config_reg_bits { -- cgit v1.2.3 From 5b4793f817452e478442684e6bba85bddb5a9345 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 13 Feb 2017 14:00:59 +0200 Subject: net/mlx5e: Add support for reading connector type from PTYS Read port connector type from the firmware instead of caching it in the driver metadata. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 7 +++++-- include/linux/mlx5/port.h | 13 +++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ec308657af3b..32b044e953d2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -7295,7 +7295,8 @@ struct mlx5_ifc_ptys_reg_bits { u8 ib_link_width_oper[0x10]; u8 ib_proto_oper[0x10]; - u8 reserved_at_160[0x20]; + u8 reserved_at_160[0x1c]; + u8 connector_type[0x4]; u8 eth_proto_lp_advertise[0x20]; @@ -7698,8 +7699,10 @@ struct mlx5_ifc_peir_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x7e]; + u8 reserved_at_0[0x7c]; + u8 ptys_connector_type[0x1]; + u8 reserved_at_7d[0x1]; u8 ppcnt_discard_group[0x1]; u8 ppcnt_statistical_group[0x1]; }; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e527732fb31b..c57d4b7de3a8 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -92,6 +92,19 @@ enum mlx5e_link_mode { MLX5E_LINK_MODES_NUMBER, }; +enum mlx5e_connector_type { + MLX5E_PORT_UNKNOWN = 0, + MLX5E_PORT_NONE = 1, + MLX5E_PORT_TP = 2, + MLX5E_PORT_AUI = 3, + MLX5E_PORT_BNC = 4, + MLX5E_PORT_MII = 5, + MLX5E_PORT_FIBRE = 6, + MLX5E_PORT_DA = 7, + MLX5E_PORT_OTHER = 8, + MLX5E_CONNECTOR_TYPE_NUMBER, +}; + #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) #define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF -- cgit v1.2.3 From aaac082dac0a8ac6b00509c7ae2fa8280f966652 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 6 Jun 2017 23:59:34 -0700 Subject: HID: serialize hid_hw_open and hid_hw_close The HID transport drivers either re-implement exactly the same logic (usbhid, i2c-hid) or forget to implement it (usbhid) which causes issues when the same device is accessed via multiple interfaces (for example input device through evdev and also hidraw). Let's muve the locking logic into HID core to make sure the serialized behavior is always enforced. Also let's uninline and move hid_hw_start() and hid_hw_stop() into hid-core as hid_hw_start() is somewhat large and do not believe we get any benefit from these two being inline. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 72 +++++++---------------------------------------------- 1 file changed, 9 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5be325d890d9..5501eb64dbc4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -520,7 +521,10 @@ struct hid_device { /* device report descriptor */ struct semaphore driver_input_lock; /* protects the current driver */ struct device dev; /* device */ struct hid_driver *driver; + struct hid_ll_driver *ll_driver; + struct mutex ll_open_lock; + unsigned int ll_open_count; #ifdef CONFIG_HID_BATTERY_STRENGTH /* @@ -937,69 +941,11 @@ static inline int __must_check hid_parse(struct hid_device *hdev) return hid_open_report(hdev); } -/** - * hid_hw_start - start underlaying HW - * - * @hdev: hid device - * @connect_mask: which outputs to connect, see HID_CONNECT_* - * - * Call this in probe function *after* hid_parse. This will setup HW buffers - * and start the device (if not deffered to device open). hid_hw_stop must be - * called if this was successful. - */ -static inline int __must_check hid_hw_start(struct hid_device *hdev, - unsigned int connect_mask) -{ - int ret = hdev->ll_driver->start(hdev); - if (ret || !connect_mask) - return ret; - ret = hid_connect(hdev, connect_mask); - if (ret) - hdev->ll_driver->stop(hdev); - return ret; -} - -/** - * hid_hw_stop - stop underlaying HW - * - * @hdev: hid device - * - * This is usually called from remove function or from probe when something - * failed and hid_hw_start was called already. - */ -static inline void hid_hw_stop(struct hid_device *hdev) -{ - hid_disconnect(hdev); - hdev->ll_driver->stop(hdev); -} - -/** - * hid_hw_open - signal underlaying HW to start delivering events - * - * @hdev: hid device - * - * Tell underlying HW to start delivering events from the device. - * This function should be called sometime after successful call - * to hid_hiw_start(). - */ -static inline int __must_check hid_hw_open(struct hid_device *hdev) -{ - return hdev->ll_driver->open(hdev); -} - -/** - * hid_hw_close - signal underlaying HW to stop delivering events - * - * @hdev: hid device - * - * This function indicates that we are not interested in the events - * from this device anymore. Delivery of events may or may not stop, - * depending on the number of users still outstanding. - */ -static inline void hid_hw_close(struct hid_device *hdev) -{ - hdev->ll_driver->close(hdev); -} +int __must_check hid_hw_start(struct hid_device *hdev, + unsigned int connect_mask); +void hid_hw_stop(struct hid_device *hdev); +int __must_check hid_hw_open(struct hid_device *hdev); +void hid_hw_close(struct hid_device *hdev); /** * hid_hw_power - requests underlying HW to go into given power mode -- cgit v1.2.3 From 283a21da1239d8db7fdf6d9077feed73a6efffa2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 6 Jun 2017 23:59:38 -0700 Subject: HID: remove no longer used hid->open field Now that all users have migrated to use hid->ll_open_count, we can remove hid->open field. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5501eb64dbc4..72e8ac667771 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -548,7 +548,6 @@ struct hid_device { /* device report descriptor */ void *hiddev; /* The hiddev structure */ void *hidraw; - int open; /* is the device open by anyone? */ char name[128]; /* Device name */ char phys[64]; /* Device physical location */ char uniq[64]; /* Device unique identifier (serial #) */ -- cgit v1.2.3 From a5fcf8a6c968ed8e312ff0b2a55d4c62d821eabb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 6 Jun 2017 17:00:16 +0200 Subject: net: propagate tc filter chain index down the ndo_setup_tc call We need to push the chain index down to the drivers, so they have the information to which chain the rule belongs. For now, no driver supports multichain offload, so only chain 0 is supported. This is needed to prevent chain squashes during offload for now. Later this will be used to implement multichain offload. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c50c9218e31e..524c7776ce96 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -972,7 +972,7 @@ struct xfrmdev_ops { * with PF and querying it may introduce a theoretical security risk. * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); - * int (*ndo_setup_tc)(struct net_device *dev, u32 handle, + * int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u32 chain_index, * __be16 protocol, struct tc_to_netdev *tc); * Called to setup any 'tc' scheduler, classifier or action on @dev. * This is always called from the stack with the rtnl lock held and netif @@ -1222,7 +1222,7 @@ struct net_device_ops { struct net_device *dev, int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, - u32 handle, + u32 handle, u32 chain_index, __be16 protocol, struct tc_to_netdev *tc); #if IS_ENABLED(CONFIG_FCOE) -- cgit v1.2.3 From c08b1f45d7d193b3e6dcbbf30d403cb49b667b8c Mon Sep 17 00:00:00 2001 From: Liam Breck Date: Wed, 7 Jun 2017 11:37:51 -0700 Subject: power: supply: core: Add power_supply_battery_info and API power_supply_get_battery_info() reads battery data from devicetree. struct power_supply_battery_info provides battery data to drivers. Its fields correspond to elements in enum power_supply_property. Drivers may surface battery data in sysfs via corresponding POWER_SUPPLY_PROP_* fields. Signed-off-by: Matt Ranostay Signed-off-by: Liam Breck Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 4bd34051995e..34345d716286 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -289,6 +289,25 @@ struct power_supply_info { int use_for_apm; }; +/* + * This is the recommended struct to manage static battery parameters, + * populated by power_supply_get_battery_info(). Most platform drivers should + * use these for consistency. + * Its field names must correspond to elements in enum power_supply_property. + * The default field value is -EINVAL. + * Power supply class itself doesn't use this. + */ + +struct power_supply_battery_info { + int energy_full_design_uwh; /* microWatt-hours */ + int charge_full_design_uah; /* microAmp-hours */ + int voltage_min_design_uv; /* microVolts */ + int precharge_current_ua; /* microAmps */ + int charge_term_current_ua; /* microAmps */ + int constant_charge_current_max_ua; /* microAmps */ + int constant_charge_voltage_max_uv; /* microVolts */ +}; + extern struct atomic_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); @@ -307,6 +326,9 @@ static inline struct power_supply * devm_power_supply_get_by_phandle(struct device *dev, const char *property) { return NULL; } #endif /* CONFIG_OF */ + +extern int power_supply_get_battery_info(struct power_supply *psy, + struct power_supply_battery_info *info); extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); extern int power_supply_set_battery_charged(struct power_supply *psy); -- cgit v1.2.3 From 413de34ab93edc80ef710c54ceb0987b8496aef3 Mon Sep 17 00:00:00 2001 From: Liam Breck Date: Wed, 7 Jun 2017 11:37:52 -0700 Subject: power: supply: core: Add power_supply_prop_precharge Battery chargers use POWER_SUPPLY_PROP_PRECHARGE_CURRENT Clarify related item POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT Signed-off-by: Liam Breck Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 34345d716286..de89066b72b1 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -146,6 +146,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */ POWER_SUPPLY_PROP_SCOPE, + POWER_SUPPLY_PROP_PRECHARGE_CURRENT, POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT, POWER_SUPPLY_PROP_CALIBRATE, /* Properties of type `const char *' */ @@ -382,6 +383,8 @@ static inline bool power_supply_is_amp_property(enum power_supply_property psp) case POWER_SUPPLY_PROP_CHARGE_NOW: case POWER_SUPPLY_PROP_CHARGE_AVG: case POWER_SUPPLY_PROP_CHARGE_COUNTER: + case POWER_SUPPLY_PROP_PRECHARGE_CURRENT: + case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT: case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT: case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX: case POWER_SUPPLY_PROP_CURRENT_MAX: -- cgit v1.2.3 From 14073f6614f62dc7862c83575b042424599cc867 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Wed, 7 Jun 2017 11:37:54 -0700 Subject: power: supply: bq27xxx: Add bulk transfer bus methods Declare bus.write/read_bulk/write_bulk(). Add I2C write/read_bulk/write_bulk() to implement the above. Add bq27xxx_write/read_block/write_block() helpers to call the above. Signed-off-by: Matt Ranostay Signed-off-by: Liam Breck Acked-by: "Andrew F. Davis" Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index b312bcef53da..c3369fa605f9 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -40,6 +40,9 @@ struct bq27xxx_platform_data { struct bq27xxx_device_info; struct bq27xxx_access_methods { int (*read)(struct bq27xxx_device_info *di, u8 reg, bool single); + int (*write)(struct bq27xxx_device_info *di, u8 reg, int value, bool single); + int (*read_bulk)(struct bq27xxx_device_info *di, u8 reg, u8 *data, int len); + int (*write_bulk)(struct bq27xxx_device_info *di, u8 reg, u8 *data, int len); }; struct bq27xxx_reg_cache { -- cgit v1.2.3 From 6016ffc3874d3a1ddf41518481da54b4714717af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 Apr 2017 16:20:07 -0700 Subject: atomics: Add header comment so spin_unlock_wait() There is material describing the ordering guarantees provided by spin_unlock_wait(), but it is not necessarily easy to find. This commit therefore adds a docbook header comment to this function informally describing its semantics. Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra --- include/linux/spinlock.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 59248dcc6ef3..d9510e8522d4 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -369,6 +369,26 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock) raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ }) +/** + * spin_unlock_wait - Interpose between successive critical sections + * @lock: the spinlock whose critical sections are to be interposed. + * + * Semantically this is equivalent to a spin_lock() immediately + * followed by a spin_unlock(). However, most architectures have + * more efficient implementations in which the spin_unlock_wait() + * cannot block concurrent lock acquisition, and in some cases + * where spin_unlock_wait() does not write to the lock variable. + * Nevertheless, spin_unlock_wait() can have high overhead, so if + * you feel the need to use it, please check to see if there is + * a better way to get your job done. + * + * The ordering guarantees provided by spin_unlock_wait() are: + * + * 1. All accesses preceding the spin_unlock_wait() happen before + * any accesses in later critical sections for this same lock. + * 2. All accesses following the spin_unlock_wait() happen after + * any accesses in earlier critical sections for this same lock. + */ static __always_inline void spin_unlock_wait(spinlock_t *lock) { raw_spin_unlock_wait(&lock->rlock); -- cgit v1.2.3 From 3ddf20c953520203c42dbed1f091ed52080e1cd2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Apr 2017 13:33:20 -0700 Subject: srcu: Shrink Tiny SRCU a bit more This commit rearranges Tiny SRCU's srcu_struct structure, substitutes u8 for bool, and shrinks counters down to short. Signed-off-by: Paul E. McKenney --- include/linux/srcutiny.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 42311ee0334f..b8859179b001 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -27,15 +27,15 @@ #include struct srcu_struct { - int srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ + short srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ + short srcu_idx; /* Current reader array element. */ + u8 srcu_gp_running; /* GP workqueue running? */ + u8 srcu_gp_waiting; /* GP waiting for readers? */ struct swait_queue_head srcu_wq; /* Last srcu_read_unlock() wakes GP. */ unsigned long srcu_gp_seq; /* GP seq # for callback tagging. */ struct rcu_segcblist srcu_cblist; /* Pending SRCU callbacks. */ - int srcu_idx; /* Current reader array element. */ - bool srcu_gp_running; /* GP workqueue running? */ - bool srcu_gp_waiting; /* GP waiting for readers? */ struct work_struct srcu_work; /* For driving grace periods. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; -- cgit v1.2.3 From 59d80fd8351b7b9a5dc7bbfa8bc4ca19f6ff3dad Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 10:20:28 -0700 Subject: rcu: Print out rcupdate.c non-default boot-time settings This commit adds a rcupdate_announce_bootup_oddness() function to print out non-default values of significant kernel boot parameter settings to aid in debugging. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index e1e5d002fdb9..393e461d3ea8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -76,6 +76,7 @@ bool rcu_gp_is_normal(void); /* Internal RCU use. */ bool rcu_gp_is_expedited(void); /* Internal RCU use. */ void rcu_expedite_gp(void); void rcu_unexpedite_gp(void); +void rcupdate_announce_bootup_oddness(void); #endif /* #else #ifdef CONFIG_TINY_RCU */ enum rcutorture_type { -- cgit v1.2.3 From 07f6e64bf2ab98cad0d9c595659209858e7bff83 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 13:53:04 -0700 Subject: srcu: Make SRCU be once again optional Commit d160a727c40e ("srcu: Make SRCU be built by default") in response to build errors, which were caused by code that included srcu.h despite !SRCU. However, srcutiny.o is almost 2K of code, which is not insignificant for those attempting to run the Linux kernel on IoT devices. This commit therefore makes SRCU be once again optional, and adjusts srcu.h to allow error-free inclusion in !SRCU kernel builds. Signed-off-by: Paul E. McKenney Acked-by: Nicolas Pitre --- include/linux/srcu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4c1d5f7e62c4..ea356d800675 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -62,8 +62,13 @@ int init_srcu_struct(struct srcu_struct *sp); #include #elif defined(CONFIG_CLASSIC_SRCU) #include -#else +#elif defined(CONFIG_SRCU) #error "Unknown SRCU implementation specified to kernel configuration" +#else + +/* Dummy definition for things like notifiers. Actual use gets link error. */ +struct srcu_struct { }; + #endif /** -- cgit v1.2.3 From d4efe6c5ad91f9a1f2f1d66b7fbfc87e320b2abc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 14:16:16 -0700 Subject: srcu: Shrink Tiny SRCU a bit In Tiny SRCU, __srcu_read_lock() is a trivial function, outweighed by its EXPORT_SYMBOL_GPL(), and on many architectures, its call sequence. This commit therefore moves it to srcutiny.h so that it can be inlined. Signed-off-by: Paul E. McKenney --- include/linux/srcutiny.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index b8859179b001..b6edd9c8fdce 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -63,6 +63,21 @@ void srcu_drive_gp(struct work_struct *wp); void synchronize_srcu(struct srcu_struct *sp); +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Can be invoked from irq/bh handlers, but the matching + * __srcu_read_unlock() must be in the same handler instance. Returns an + * index that must be passed to the matching srcu_read_unlock(). + */ +static inline int __srcu_read_lock(struct srcu_struct *sp) +{ + int idx; + + idx = READ_ONCE(sp->srcu_idx); + WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1); + return idx; +} + static inline void synchronize_srcu_expedited(struct srcu_struct *sp) { synchronize_srcu(sp); -- cgit v1.2.3 From 0670c9b3588f163cfcfcd8ea532f321ec004e6ad Mon Sep 17 00:00:00 2001 From: Liam Breck Date: Wed, 7 Jun 2017 11:37:55 -0700 Subject: power: supply: bq27xxx: Add chip data memory read/write support Add these to enable read/write of chip data memory RAM/NVM/flash: bq27xxx_battery_seal() bq27xxx_battery_unseal() bq27xxx_battery_set_cfgupdate() bq27xxx_battery_soft_reset() bq27xxx_battery_read_dm_block() bq27xxx_battery_write_dm_block() bq27xxx_battery_checksum_dm_block() Signed-off-by: Matt Ranostay Signed-off-by: Liam Breck Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index c3369fa605f9..b1defb86e9b6 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -64,6 +64,7 @@ struct bq27xxx_device_info { int id; enum bq27xxx_chip chip; const char *name; + u32 unseal_key; struct bq27xxx_access_methods bus; struct bq27xxx_reg_cache cache; int charge_design_full; -- cgit v1.2.3 From ccce440956c79343ab3aa1269a4cf57f9cce030f Mon Sep 17 00:00:00 2001 From: Liam Breck Date: Wed, 7 Jun 2017 11:37:56 -0700 Subject: power: supply: bq27xxx: Add power_supply_battery_info support Previously there was no way to configure these chips in the event that the defaults didn't match the battery in question. For chips with RAM data memory (and also those with flash/NVM data memory if CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is defined and the user has not set module param dt_monitored_battery_updates_nvm=0) we now call power_supply_get_battery_info(), check its values, and write battery properties to chip data memory if there is a dm_regs table for the chip. Signed-off-by: Matt Ranostay Signed-off-by: Liam Breck Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index b1defb86e9b6..11e11685dd1d 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -63,7 +63,9 @@ struct bq27xxx_device_info { struct device *dev; int id; enum bq27xxx_chip chip; + bool ram_chip; const char *name; + struct bq27xxx_dm_reg *dm_regs; u32 unseal_key; struct bq27xxx_access_methods bus; struct bq27xxx_reg_cache cache; -- cgit v1.2.3 From f1bea8793d939e594afb3407c26d9cec8792d42f Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 26 May 2017 23:51:29 -0700 Subject: power: reset: reboot-mode: Make include file global Move the reboot-mode.h include file into include/linux to allow drivers outside drivers/power/reset to implement reboot-mode. Signed-off-by: Bjorn Andersson Signed-off-by: Sebastian Reichel --- include/linux/reboot-mode.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/reboot-mode.h (limited to 'include/linux') diff --git a/include/linux/reboot-mode.h b/include/linux/reboot-mode.h new file mode 100644 index 000000000000..75f7fe5c881f --- /dev/null +++ b/include/linux/reboot-mode.h @@ -0,0 +1,18 @@ +#ifndef __REBOOT_MODE_H__ +#define __REBOOT_MODE_H__ + +struct reboot_mode_driver { + struct device *dev; + struct list_head head; + int (*write)(struct reboot_mode_driver *reboot, unsigned int magic); + struct notifier_block reboot_notifier; +}; + +int reboot_mode_register(struct reboot_mode_driver *reboot); +int reboot_mode_unregister(struct reboot_mode_driver *reboot); +int devm_reboot_mode_register(struct device *dev, + struct reboot_mode_driver *reboot); +void devm_reboot_mode_unregister(struct device *dev, + struct reboot_mode_driver *reboot); + +#endif -- cgit v1.2.3 From 6623ec7c4dbe18a5a2878e2d888be70d08a91826 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 7 May 2017 05:53:37 -0700 Subject: securityfs: add the ability to support symlinks Signed-off-by: John Johansen Reviewed-by: Seth Arnold Acked-by: Kees Cook --- include/linux/security.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index af675b576645..caf8b64d8b5c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1651,6 +1651,10 @@ extern struct dentry *securityfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent); +struct dentry *securityfs_create_symlink(const char *name, + struct dentry *parent, + const char *target, + const struct inode_operations *iops); extern void securityfs_remove(struct dentry *dentry); #else /* CONFIG_SECURITYFS */ @@ -1670,6 +1674,14 @@ static inline struct dentry *securityfs_create_file(const char *name, return ERR_PTR(-ENODEV); } +static inline struct dentry *securityfs_create_symlink(const char *name, + struct dentry *parent, + const char *target, + const struct inode_operations *iops) +{ + return ERR_PTR(-ENODEV); +} + static inline void securityfs_remove(struct dentry *dentry) {} -- cgit v1.2.3 From aaaad0bfac019bb7701f92ebc1b31b4f85e47b55 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 2 May 2017 09:39:09 -0700 Subject: rcu: Flag need for rcu_node_tree.h and rcu_segcblist.h visibility The rcu_node_tree.h and rcu_segcblist.h header files in the include/linux directory might appear at first sight to be internal to the RCU implementation. However, the definitions in these files are needed to determine the size of TREE SRCU's srcu_struct structure, so they must be externally visible, which is why they live in include/linux. This commit adds comments to this effect to those files. Signed-off-by: Paul E. McKenney --- include/linux/rcu_node_tree.h | 4 ++++ include/linux/rcu_segcblist.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h index 4b766b61e1a0..426cee67f0e2 100644 --- a/include/linux/rcu_node_tree.h +++ b/include/linux/rcu_node_tree.h @@ -7,6 +7,10 @@ * unlimited scalability while maintaining a constant level of contention * on the root node. * + * This seemingly RCU-private file must be available to SRCU users + * because the size of the TREE SRCU srcu_struct structure depends + * on these definitions. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index ba4d2621d9ca..c3ad00e63556 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -1,6 +1,10 @@ /* * RCU segmented callback lists * + * This seemingly RCU-private file must be available to SRCU users + * because the size of the TREE SRCU srcu_struct structure depends + * on these definitions. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or -- cgit v1.2.3 From a68a2bb28bbf7a6dd4672a25bd87fd1b5db4fa7d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 08:34:57 -0700 Subject: rcu: Move docbook comments out of rcupdate.h The include/linux/rcupdate.h file is included by more than 200 files, so shrinking it should provide some build-time benefits. This commit therefore moves several docbook comments from rcupdate.h to kernel/rcu/update.c, kernel/rcu/tree.c, and kernel/rcu/tree_plugin.h, thus reducing the number of times that the compiler has to scan these comments. This likely provides only a small benefit, but every little bit helps. This commit also fixes a malformed bulleted list noted by the 0day Test Robot. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 117 ++--------------------------------------------- 1 file changed, 3 insertions(+), 114 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 393e461d3ea8..7a206f039fc2 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -140,115 +140,14 @@ void do_trace_rcu_torture_read(const char *rcutorturename, /* Exported common interfaces */ #ifdef CONFIG_PREEMPT_RCU - -/** - * call_rcu() - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all pre-existing RCU read-side - * critical sections have completed. However, the callback function - * might well execute concurrently with RCU read-side critical sections - * that started after call_rcu() was invoked. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - * - * Note that all CPUs must agree that the grace period extended beyond - * all pre-existing RCU read-side critical section. On systems with more - * than one CPU, this means that when "func()" is invoked, each CPU is - * guaranteed to have executed a full memory barrier since the end of its - * last RCU read-side critical section whose beginning preceded the call - * to call_rcu(). It also means that each CPU executing an RCU read-side - * critical section that continues beyond the start of "func()" must have - * executed a memory barrier after the call_rcu() but before the beginning - * of that RCU read-side critical section. Note that these guarantees - * include CPUs that are offline, idle, or executing in user mode, as - * well as CPUs that are executing in the kernel. - * - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the - * resulting RCU callback function "func()", then both CPU A and CPU B are - * guaranteed to execute a full memory barrier during the time interval - * between the call to call_rcu() and the invocation of "func()" -- even - * if CPU A and CPU B are the same CPU (but again only if the system has - * more than one CPU). - */ -void call_rcu(struct rcu_head *head, - rcu_callback_t func); - +void call_rcu(struct rcu_head *head, rcu_callback_t func); #else /* #ifdef CONFIG_PREEMPT_RCU */ - -/* In classic RCU, call_rcu() is just call_rcu_sched(). */ #define call_rcu call_rcu_sched - #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ -/** - * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_bh() assumes - * that the read-side critical sections end on completion of a softirq - * handler. This means that read-side critical sections in process - * context must not be interrupted by softirqs. This interface is to be - * used when most of the read-side critical sections are in softirq context. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. - * OR - * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. - * These may be nested. - * - * See the description of call_rcu() for more detailed information on - * memory ordering guarantees. - */ -void call_rcu_bh(struct rcu_head *head, - rcu_callback_t func); - -/** - * call_rcu_sched() - Queue an RCU for invocation after sched grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_sched() assumes - * that the read-side critical sections end on enabling of preemption - * or on voluntary preemption. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock_sched() and rcu_read_unlock_sched(), - * OR - * anything that disables preemption. - * These may be nested. - * - * See the description of call_rcu() for more detailed information on - * memory ordering guarantees. - */ -void call_rcu_sched(struct rcu_head *head, - rcu_callback_t func); - +void call_rcu_bh(struct rcu_head *head, rcu_callback_t func); +void call_rcu_sched(struct rcu_head *head, rcu_callback_t func); void synchronize_sched(void); - -/** - * call_rcu_tasks() - Queue an RCU for invocation task-based grace period - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_tasks() assumes - * that the read-side critical sections end at a voluntary context - * switch (not a preemption!), entry into idle, or transition to usermode - * execution. As such, there are no read-side primitives analogous to - * rcu_read_lock() and rcu_read_unlock() because this primitive is intended - * to determine that all tasks have passed through a safe state, not so - * much for data-strcuture synchronization. - * - * See the description of call_rcu() for more detailed information on - * memory ordering guarantees. - */ void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); void rcu_barrier_tasks(void); @@ -474,18 +373,8 @@ extern struct lockdep_map rcu_bh_lock_map; extern struct lockdep_map rcu_sched_lock_map; extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); - int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); - -/** - * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? - * - * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an - * RCU-sched read-side critical section. In absence of - * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side - * critical section unless it can prove otherwise. - */ int rcu_read_lock_sched_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -- cgit v1.2.3 From 3caec62fbb313946b9be53720bbf2280bb19ec28 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 09:27:15 -0700 Subject: rcu: Move rcu_expedited and rcu_normal externs from rcupdate.h The rcu_expedited and rcu_normal variables are used only by sysctl and kernel/rcu/update.c, so it does not make sense to their extern declarations in rcupdate.h. This commit therefore moves these extern declarations to update.c. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7a206f039fc2..6e7e930c1610 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,13 +46,6 @@ #include #include -#include - -#ifndef CONFIG_TINY_RCU -extern int rcu_expedited; /* for sysctl */ -extern int rcu_normal; /* also for sysctl */ -#endif /* #ifndef CONFIG_TINY_RCU */ - #ifdef CONFIG_TINY_RCU /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ -- cgit v1.2.3 From 25c36329a30c8cac090effe1fbae9bb916fa95fe Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 09:51:55 -0700 Subject: rcu: Move expediting-related access/control out of rcupdate.h The rcu_gp_is_normal(), rcu_gp_is_expedited(), rcu_expedite_gp(), and rcu_unexpedite_gp() functions are intended only for use within the RCU implementation itself -- the sysfs access is what should be used outside of RCU. This commit therefore moves the declarations for these functions to kernel/rcu/rcu.h, and also includes this file into kernel/rcu/rcutorture.c and kernel/rcu/rcuperf.c. This also has the beneficial effect of shrinking rcupdate.c a bit. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6e7e930c1610..049c62c59f1b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,32 +46,6 @@ #include #include -#ifdef CONFIG_TINY_RCU -/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ -static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ -{ - return true; -} -static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ -{ - return false; -} - -static inline void rcu_expedite_gp(void) -{ -} - -static inline void rcu_unexpedite_gp(void) -{ -} -#else /* #ifdef CONFIG_TINY_RCU */ -bool rcu_gp_is_normal(void); /* Internal RCU use. */ -bool rcu_gp_is_expedited(void); /* Internal RCU use. */ -void rcu_expedite_gp(void); -void rcu_unexpedite_gp(void); -void rcupdate_announce_bootup_oddness(void); -#endif /* #else #ifdef CONFIG_TINY_RCU */ - enum rcutorture_type { RCU_FLAVOR, RCU_BH_FLAVOR, -- cgit v1.2.3 From cad7b3897279c869de61dc88133037b941f84233 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 10:22:57 -0700 Subject: rcu: Move torture-related definitions from rcupdate.h to rcu.h The include/linux/rcupdate.h file contains a number of definitions that are used only to communicate between rcutorture, rcuperf, and the RCU code itself. There is no point in having these definitions exposed globally throughout the kernel, so this commit moves them to kernel/rcu/rcu.h. This change has the added benefit of shrinking rcupdate.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 52 --------------------------------------------- include/linux/rcutiny.h | 5 +++++ include/linux/rcutree.h | 1 + include/linux/srcuclassic.h | 14 ------------ include/linux/srcutiny.h | 12 ----------- include/linux/srcutree.h | 4 ---- 6 files changed, 6 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 049c62c59f1b..7557499d8e70 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,58 +46,6 @@ #include #include -enum rcutorture_type { - RCU_FLAVOR, - RCU_BH_FLAVOR, - RCU_SCHED_FLAVOR, - RCU_TASKS_FLAVOR, - SRCU_FLAVOR, - INVALID_RCU_FLAVOR -}; - -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) -void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, - unsigned long *gpnum, unsigned long *completed); -void rcutorture_record_test_transition(void); -void rcutorture_record_progress(unsigned long vernum); -void do_trace_rcu_torture_read(const char *rcutorturename, - struct rcu_head *rhp, - unsigned long secs, - unsigned long c_old, - unsigned long c); -bool rcu_irq_enter_disabled(void); -#else -static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, - int *flags, - unsigned long *gpnum, - unsigned long *completed) -{ - *flags = 0; - *gpnum = 0; - *completed = 0; -} -static inline void rcutorture_record_test_transition(void) -{ -} -static inline void rcutorture_record_progress(unsigned long vernum) -{ -} -static inline bool rcu_irq_enter_disabled(void) -{ - return false; -} -#ifdef CONFIG_RCU_TRACE -void do_trace_rcu_torture_read(const char *rcutorturename, - struct rcu_head *rhp, - unsigned long secs, - unsigned long c_old, - unsigned long c); -#else -#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ - do { } while (0) -#endif -#endif - #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 74d9c3a1feee..ade360e0d58c 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -202,6 +202,11 @@ static inline void rcu_irq_enter(void) { } +static inline bool rcu_irq_enter_disabled(void) +{ + return false; +} + static inline void rcu_irq_exit_irqson(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 0bacb6b2af69..28af91a19573 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -101,6 +101,7 @@ void rcu_irq_enter(void); void rcu_irq_exit(void); void rcu_irq_enter_irqson(void); void rcu_irq_exit_irqson(void); +bool rcu_irq_enter_disabled(void); void exit_rcu(void); diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h index 5753f7322262..41cf99930f34 100644 --- a/include/linux/srcuclassic.h +++ b/include/linux/srcuclassic.h @@ -98,18 +98,4 @@ void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); unsigned long srcu_batches_completed(struct srcu_struct *sp); -static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, - unsigned long *gpnum, - unsigned long *completed) -{ - if (test_type != SRCU_FLAVOR) - return; - *flags = 0; - *completed = sp->completed; - *gpnum = *completed; - if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head) - (*gpnum)++; -} - #endif diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index b6edd9c8fdce..85bddce6a7a6 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -93,16 +93,4 @@ static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) return 0; } -static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, - unsigned long *gpnum, - unsigned long *completed) -{ - if (test_type != SRCU_FLAVOR) - return; - *flags = 0; - *completed = sp->srcu_gp_seq; - *gpnum = *completed; -} - #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 32e86d85fd11..f4adfed17b51 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -143,8 +143,4 @@ void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); unsigned long srcu_batches_completed(struct srcu_struct *sp); -void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, - unsigned long *gpnum, unsigned long *completed); - #endif -- cgit v1.2.3 From c4cbf9f736f5bd0a53a5ea401d86376c86bf905e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 10:36:36 -0700 Subject: rcu: Remove UINT_CMP_GE() and UINT_CMP_LT() The UINT_CMP_GE() and UINT_CMP_LT() macros are not used, so this commit removes them. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7557499d8e70..fa3f921e5874 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,8 +46,6 @@ #include #include -#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) -#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) #define ulong2long(a) (*(long *)(&(a))) -- cgit v1.2.3 From d0df7a349133e10184d478ae1189e79e5c53615d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 10:52:10 -0700 Subject: rcu: Move rcupdate.h to new empty-function style This commit saves a few lines in include/linux/rcupdate.h by moving to single-line definitions for empty functions, instead of the old style where the two curly braces each get their own line. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index fa3f921e5874..415633076cb1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -124,12 +124,8 @@ static inline void rcu_end_inkernel_boot(void) { } void rcu_sysrq_start(void); void rcu_sysrq_end(void); #else /* #ifdef CONFIG_RCU_STALL_COMMON */ -static inline void rcu_sysrq_start(void) -{ -} -static inline void rcu_sysrq_end(void) -{ -} +static inline void rcu_sysrq_start(void) { } +static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ #ifdef CONFIG_NO_HZ_FULL @@ -143,9 +139,7 @@ static inline void rcu_user_exit(void) { } #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); #else /* #ifdef CONFIG_RCU_NOCB_CPU */ -static inline void rcu_init_nohz(void) -{ -} +static inline void rcu_init_nohz(void) { } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /** @@ -243,21 +237,10 @@ void destroy_rcu_head(struct rcu_head *head); void init_rcu_head_on_stack(struct rcu_head *head); void destroy_rcu_head_on_stack(struct rcu_head *head); #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -static inline void init_rcu_head(struct rcu_head *head) -{ -} - -static inline void destroy_rcu_head(struct rcu_head *head) -{ -} - -static inline void init_rcu_head_on_stack(struct rcu_head *head) -{ -} - -static inline void destroy_rcu_head_on_stack(struct rcu_head *head) -{ -} +static inline void init_rcu_head(struct rcu_head *head) { } +static inline void destroy_rcu_head(struct rcu_head *head) { } +static inline void init_rcu_head_on_stack(struct rcu_head *head) { } +static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) @@ -334,9 +317,7 @@ static inline void rcu_preempt_sleep_check(void) "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ -static inline void rcu_preempt_sleep_check(void) -{ -} +static inline void rcu_preempt_sleep_check(void) { } #endif /* #else #ifdef CONFIG_PROVE_RCU */ #define rcu_sleep_check() \ @@ -915,9 +896,7 @@ static inline bool rcu_sys_is_idle(void) return false; } -static inline void rcu_sysidle_force_exit(void) -{ -} +static inline void rcu_sysidle_force_exit(void) { } #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -- cgit v1.2.3 From 791875d16e2f6e2e5b90328ccac643f512ac76c4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 11:06:05 -0700 Subject: rcu: Eliminate the unused __rcu_is_watching() function The __rcu_is_watching() function is currently not used, aside from to implement the rcu_is_watching() function. This commit therefore eliminates __rcu_is_watching(), which has the beneficial side-effect of shrinking include/linux/rcupdate.h a bit. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ---- include/linux/rcutiny.h | 11 ----------- 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 415633076cb1..b4edfe0966c6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -204,10 +204,6 @@ do { \ rcu_note_voluntary_context_switch(current); \ } while (0) -#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) -bool __rcu_is_watching(void); -#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ - /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ade360e0d58c..5ed6934152a6 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -232,22 +232,11 @@ static inline void rcu_scheduler_starting(void) } #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ -#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) - -static inline bool rcu_is_watching(void) -{ - return __rcu_is_watching(); -} - -#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ - static inline bool rcu_is_watching(void) { return true; } -#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ - static inline void rcu_request_urgent_qs_task(struct task_struct *t) { } -- cgit v1.2.3 From 82118249d0ca4078d56d5e43172ada1567fdf946 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 11:13:24 -0700 Subject: rcu: Move the RCU_SCHEDULER_ definitions from rcupdate.h The RCU_SCHEDULER_INACTIVE, RCU_SCHEDULER_INIT, and RCU_SCHEDULER_RUNNING definitions are used only within RCU, so this commit moves them from include/linux/rcupdate.h to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b4edfe0966c6..9206a28a2d44 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -217,10 +217,6 @@ do { \ #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_SCHEDULER_INACTIVE 0 -#define RCU_SCHEDULER_INIT 1 -#define RCU_SCHEDULER_RUNNING 2 - /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures -- cgit v1.2.3 From 752de307b0ee47308bfc299de3a3ad623c16b4d8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 11:18:01 -0700 Subject: rcu: Remove linux/debugobjects.h from rcupdate.h The include/linux/rcupdate.h file does not actually need anything from linux/debugobjects.h, so this commit removes the inclusion. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9206a28a2d44..f105f0834bbe 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 3d54f7983f3e6ac9f444fa20970b1abc8f089b79 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 12:25:50 -0700 Subject: rcu: Move rcu_is_nocb_cpu() from rcupdate.h to rcu.h The rcu_is_nocb_cpu() function is used only internally to RCU. This commit therefore moves its declaration from include/linux/rcupdate.h to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f105f0834bbe..003427425e27 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -867,15 +867,6 @@ static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) } #endif /* #ifdef CONFIG_TINY_RCU */ -#if defined(CONFIG_RCU_NOCB_CPU_ALL) -static inline bool rcu_is_nocb_cpu(int cpu) { return true; } -#elif defined(CONFIG_RCU_NOCB_CPU) -bool rcu_is_nocb_cpu(int cpu); -#else -static inline bool rcu_is_nocb_cpu(int cpu) { return false; } -#endif - - /* Only for use by adaptive-ticks code. */ #ifdef CONFIG_NO_HZ_FULL_SYSIDLE bool rcu_sys_is_idle(void); -- cgit v1.2.3 From b8989b76052eedc99b09322efd6f68816f191a1a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 12:28:59 -0700 Subject: rcu: Move rcu_ftrace_dump() from rcupdate.h to rcu.h The rcu_ftrace_dump() function is used only internally to RCU. This commit therefore moves its declaration from include/linux/rcupdate.h to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 003427425e27..ad5e6934dcf3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -883,18 +883,6 @@ static inline void rcu_sysidle_force_exit(void) { } #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -/* - * Dump the ftrace buffer, but only one time per callsite per boot. - */ -#define rcu_ftrace_dump(oops_dump_mode) \ -do { \ - static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \ - \ - if (!atomic_read(&___rfd_beenhere) && \ - !atomic_xchg(&___rfd_beenhere, 1)) \ - ftrace_dump(oops_dump_mode); \ -} while (0) - /* * Place this after a lock-acquisition primitive to guarantee that * an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies -- cgit v1.2.3 From 17a8c187310ccc5f5b65a7d8faf96fdc66c5fe3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 12:32:55 -0700 Subject: rcu: move rcupdate.h to the new true/false-function style This commit saves a few lines in include/linux/rcupdate.h by moving to single-line definitions for functions that just return either true or false, instead of the old style where the two curly braces each get their own line. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ad5e6934dcf3..564096e6e141 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -237,10 +237,7 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { } #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ -static inline bool rcu_lockdep_current_cpu_online(void) -{ - return true; -} +static inline bool rcu_lockdep_current_cpu_online(void) { return true; } #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -872,14 +869,8 @@ static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) bool rcu_sys_is_idle(void); void rcu_sysidle_force_exit(void); #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ - -static inline bool rcu_sys_is_idle(void) -{ - return false; -} - +static inline bool rcu_sys_is_idle(void) { return false; } static inline void rcu_sysidle_force_exit(void) { } - #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -- cgit v1.2.3 From e3c8d51e1a58c73a557eb38a9a6afb4f704a3379 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 13:37:16 -0700 Subject: rcu: Move torture-related functions out of rcutiny.h and rcutree.h The various functions similar to rcu_batches_started(), the function show_rcu_gp_kthreads(), the various functions similar to rcu_force_quiescent_state(), and the variables rcutorture_testseq and rcutorture_vernum are used only within RCU. There is therefore no point in exporting them to the kernel at large from include/linux/rcutiny.h and include/linux/rcutree.h. This commit therefore moves all of these to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 80 ------------------------------------------------- include/linux/rcutree.h | 16 ---------- 2 files changed, 96 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5ed6934152a6..0d9270913686 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -106,86 +106,6 @@ static inline void rcu_virt_note_context_switch(int cpu) { } -/* - * Return the number of grace periods started. - */ -static inline unsigned long rcu_batches_started(void) -{ - return 0; -} - -/* - * Return the number of bottom-half grace periods started. - */ -static inline unsigned long rcu_batches_started_bh(void) -{ - return 0; -} - -/* - * Return the number of sched grace periods started. - */ -static inline unsigned long rcu_batches_started_sched(void) -{ - return 0; -} - -/* - * Return the number of grace periods completed. - */ -static inline unsigned long rcu_batches_completed(void) -{ - return 0; -} - -/* - * Return the number of bottom-half grace periods completed. - */ -static inline unsigned long rcu_batches_completed_bh(void) -{ - return 0; -} - -/* - * Return the number of sched grace periods completed. - */ -static inline unsigned long rcu_batches_completed_sched(void) -{ - return 0; -} - -/* - * Return the number of expedited grace periods completed. - */ -static inline unsigned long rcu_exp_batches_completed(void) -{ - return 0; -} - -/* - * Return the number of expedited sched grace periods completed. - */ -static inline unsigned long rcu_exp_batches_completed_sched(void) -{ - return 0; -} - -static inline void rcu_force_quiescent_state(void) -{ -} - -static inline void rcu_bh_force_quiescent_state(void) -{ -} - -static inline void rcu_sched_force_quiescent_state(void) -{ -} - -static inline void show_rcu_gp_kthreads(void) -{ -} - static inline void rcu_cpu_stall_reset(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 28af91a19573..43113323ca09 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -79,22 +79,6 @@ void cond_synchronize_rcu(unsigned long oldstate); unsigned long get_state_synchronize_sched(void); void cond_synchronize_sched(unsigned long oldstate); -extern unsigned long rcutorture_testseq; -extern unsigned long rcutorture_vernum; -unsigned long rcu_batches_started(void); -unsigned long rcu_batches_started_bh(void); -unsigned long rcu_batches_started_sched(void); -unsigned long rcu_batches_completed(void); -unsigned long rcu_batches_completed_bh(void); -unsigned long rcu_batches_completed_sched(void); -unsigned long rcu_exp_batches_completed(void); -unsigned long rcu_exp_batches_completed_sched(void); -void show_rcu_gp_kthreads(void); - -void rcu_force_quiescent_state(void); -void rcu_bh_force_quiescent_state(void); -void rcu_sched_force_quiescent_state(void); - void rcu_idle_enter(void); void rcu_idle_exit(void); void rcu_irq_enter(void); -- cgit v1.2.3 From fe21a27e8ca0937a5ac298de1f4b46382e9c5c88 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 13:45:51 -0700 Subject: rcu: Move rcu_request_urgent_qs_task() out of rcutiny.h and rcutree.h The rcu_request_urgent_qs_task() function is used only within RCU, so there is no point in exporting it to the rest of the kernel from nclude/linux/rcutiny.h and include/linux/rcutree.h. This commit therefore moves this function to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 4 ---- include/linux/rcutree.h | 3 --- 2 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 0d9270913686..f5067941bc27 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -157,10 +157,6 @@ static inline bool rcu_is_watching(void) return true; } -static inline void rcu_request_urgent_qs_task(struct task_struct *t) -{ -} - static inline void rcu_all_qs(void) { barrier(); /* Avoid RCU read-side critical sections leaking across. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 43113323ca09..d6aa89d15d47 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -91,10 +91,7 @@ void exit_rcu(void); void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; - bool rcu_is_watching(void); -void rcu_request_urgent_qs_task(struct task_struct *t); - void rcu_all_qs(void); /* RCUtree hotplug events */ -- cgit v1.2.3 From 71c40fd0b5ceb300c6cb8753835d9d94a8bfc56f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 13:51:42 -0700 Subject: rcu: Move rcutiny.h to new empty/true/false-function style This commit saves a few lines in include/linux/rcutiny.h by moving to single-line definitions for empty functions, instead of the old style where the two curly braces each get their own line. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 71 +++++++++++-------------------------------------- 1 file changed, 16 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index f5067941bc27..2bfe48bc0e3b 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -33,10 +33,8 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) return 0; } -static inline bool rcu_eqs_special_set(int cpu) -{ - return false; /* Never flag non-existent other CPUs! */ -} +/* Never flag non-existent other CPUs! */ +static inline bool rcu_eqs_special_set(int cpu) { return false; } static inline unsigned long get_state_synchronize_rcu(void) { @@ -102,65 +100,28 @@ static inline void kfree_call_rcu(struct rcu_head *head, * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. */ -static inline void rcu_virt_note_context_switch(int cpu) -{ -} - -static inline void rcu_cpu_stall_reset(void) -{ -} - -static inline void rcu_idle_enter(void) -{ -} - -static inline void rcu_idle_exit(void) -{ -} - -static inline void rcu_irq_enter(void) -{ -} - -static inline bool rcu_irq_enter_disabled(void) -{ - return false; -} - -static inline void rcu_irq_exit_irqson(void) -{ -} - -static inline void rcu_irq_enter_irqson(void) -{ -} - -static inline void rcu_irq_exit(void) -{ -} - -static inline void exit_rcu(void) -{ -} +static inline void rcu_virt_note_context_switch(int cpu) { } +static inline void rcu_cpu_stall_reset(void) { } +static inline void rcu_idle_enter(void) { } +static inline void rcu_idle_exit(void) { } +static inline void rcu_irq_enter(void) { } +static inline bool rcu_irq_enter_disabled(void) { return false; } +static inline void rcu_irq_exit_irqson(void) { } +static inline void rcu_irq_enter_irqson(void) { } +static inline void rcu_irq_exit(void) { } +static inline void exit_rcu(void) { } #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) extern int rcu_scheduler_active __read_mostly; void rcu_scheduler_starting(void); #else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ -static inline void rcu_scheduler_starting(void) -{ -} +static inline void rcu_scheduler_starting(void) { } #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ -static inline bool rcu_is_watching(void) -{ - return true; -} +static inline bool rcu_is_watching(void) { return true; } -static inline void rcu_all_qs(void) -{ - barrier(); /* Avoid RCU read-side critical sections leaking across. */ -} +/* Avoid RCU read-side critical sections leaking across. */ +static inline void rcu_all_qs(void) { barrier(); } /* RCUtree hotplug events */ #define rcutree_prepare_cpu NULL -- cgit v1.2.3 From 5a0465e17a18c467b712a816985b7b8dd8d10c16 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 May 2017 11:31:04 -0700 Subject: srcu: Shrink srcu.h by moving docbook and private function The call_srcu() docbook entry is currently in include/linux/srcu.h, which causes needless processing for each include point. This commit therefore moves this entry to kernel/rcu/srcutree.c, which the compiler reads only once. In addition, the srcu_batches_completed() function is used only within RCU and its torture-test suites. This commit therefore also moves this function's declaration from include/linux/srcutiny.h, include/linux/srcutree.h, and include/linux/srcuclassic.h to kernel/rcu/rcu.h. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 20 -------------------- include/linux/srcuclassic.h | 1 - include/linux/srcutiny.h | 5 ----- include/linux/srcutree.h | 1 - 4 files changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index ea356d800675..5f509018e6b5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -65,32 +65,12 @@ int init_srcu_struct(struct srcu_struct *sp); #elif defined(CONFIG_SRCU) #error "Unknown SRCU implementation specified to kernel configuration" #else - /* Dummy definition for things like notifiers. Actual use gets link error. */ struct srcu_struct { }; - #endif -/** - * call_srcu() - Queue a callback for invocation after an SRCU grace period - * @sp: srcu_struct in queue the callback - * @head: structure to be used for queueing the SRCU callback. - * @func: function to be invoked after the SRCU grace period - * - * The callback function will be invoked some time after a full SRCU - * grace period elapses, in other words after all pre-existing SRCU - * read-side critical sections have completed. However, the callback - * function might well execute concurrently with other SRCU read-side - * critical sections that started after call_srcu() was invoked. SRCU - * read-side critical sections are delimited by srcu_read_lock() and - * srcu_read_unlock(), and may be nested. - * - * The callback will be invoked from process context, but must nevertheless - * be fast and must not block. - */ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, void (*func)(struct rcu_head *head)); - void cleanup_srcu_struct(struct srcu_struct *sp); int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h index 41cf99930f34..67db4a36ef0d 100644 --- a/include/linux/srcuclassic.h +++ b/include/linux/srcuclassic.h @@ -96,6 +96,5 @@ void process_srcu(struct work_struct *work); void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); -unsigned long srcu_batches_completed(struct srcu_struct *sp); #endif diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 85bddce6a7a6..4c53e698c6e4 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -88,9 +88,4 @@ static inline void srcu_barrier(struct srcu_struct *sp) synchronize_srcu(sp); } -static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) -{ - return 0; -} - #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index f4adfed17b51..24e949bda12a 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -141,6 +141,5 @@ void process_srcu(struct work_struct *work); void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); -unsigned long srcu_batches_completed(struct srcu_struct *sp); #endif -- cgit v1.2.3 From 2464dd940e23bad227c387a40eec99f7aa02ed96 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 May 2017 14:29:16 -0700 Subject: srcu: Apply trivial callback lists to shrink Tiny SRCU The rcu_segcblist structure provides quite a bit of functionality, and Tiny SRCU needs almost none of it. So this commit replaces Tiny SRCU's uses of rcu_segcblist with a simple singly linked list with tail pointer. This change significantly reduces Tiny SRCU's memory footprint, more than making up for the growth caused by the creation of rcu_segcblist.c Signed-off-by: Paul E. McKenney --- include/linux/srcutiny.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 4c53e698c6e4..cfbfc540cafc 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -33,9 +33,8 @@ struct srcu_struct { u8 srcu_gp_waiting; /* GP waiting for readers? */ struct swait_queue_head srcu_wq; /* Last srcu_read_unlock() wakes GP. */ - unsigned long srcu_gp_seq; /* GP seq # for callback tagging. */ - struct rcu_segcblist srcu_cblist; - /* Pending SRCU callbacks. */ + struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ + struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */ struct work_struct srcu_work; /* For driving grace periods. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -47,7 +46,7 @@ void srcu_drive_gp(struct work_struct *wp); #define __SRCU_STRUCT_INIT(name) \ { \ .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ - .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist), \ + .srcu_cb_tail = &name.srcu_cb_head, \ .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ __SRCU_DEP_MAP_INIT(name) \ } -- cgit v1.2.3 From 0cb5133ab573d1c471cfcfa632b0260a5aad5303 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 31 May 2017 09:26:07 -0700 Subject: bcm47xx: Fix build regression Commit 0bc2d534708b ("rcu: Refactor #includes from include/linux/rcupdate.h") caused a build regression in an MTD partition driver: In file included from drivers/mtd/bcm47xxpart.c:12:0: include/linux/bcm47xx_nvram.h: In function 'bcm47xx_nvram_init_from_mem': include/linux/bcm47xx_nvram.h:27:10: error: 'ENOTSUPP' undeclared (first use in this function) The rcupdate.h file has no particular need for linux/errno.h, so this commit includes linux/errno.h into bcm47xx_nvram.h. Fixes: 0bc2d534708b ("rcu: Refactor #includes from include/linux/rcupdate.h") Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney --- include/linux/bcm47xx_nvram.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h index 2793652fbf66..a414a2b53e41 100644 --- a/include/linux/bcm47xx_nvram.h +++ b/include/linux/bcm47xx_nvram.h @@ -8,6 +8,7 @@ #ifndef __BCM47XX_NVRAM_H #define __BCM47XX_NVRAM_H +#include #include #include #include -- cgit v1.2.3 From 5f192ab027a5d865be24c817005d42eb96314dc2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 15:24:25 -0700 Subject: rcu: Refactor #includes from include/linux/rcupdate.h The list of #includes from include/linux/rcupdate.h has grown quite a bit, so it is time to trim it. This commit moves the #include of include/linux/ktime.h to include/linux/rcutiny.h, along with the Tiny-RCU-only function that was the only thing needing ktimem.h. It then reconstructs the files included into include/linux/ktime.h based on what is actually needed, with significant help from the 0day Test Robot. This single change reduces the .i file footprint from rcupdate.h from 9018 lines to 7101 lines. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 22 ++++++---------------- include/linux/rcutiny.h | 8 +++++++- 2 files changed, 13 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 564096e6e141..ee40d7eba741 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -34,16 +34,14 @@ #define __LINUX_RCUPDATE_H #include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include +#include +#include +#include +#include +#include #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) @@ -856,14 +854,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) -#ifdef CONFIG_TINY_RCU -static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) -{ - *nextevt = KTIME_MAX; - return 0; -} -#endif /* #ifdef CONFIG_TINY_RCU */ - /* Only for use by adaptive-ticks code. */ #ifdef CONFIG_NO_HZ_FULL_SYSIDLE bool rcu_sys_is_idle(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 2bfe48bc0e3b..c869785f16bd 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -25,7 +25,7 @@ #ifndef __LINUX_TINY_H #define __LINUX_TINY_H -#include +#include struct rcu_dynticks; static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) @@ -96,6 +96,12 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_note_voluntary_context_switch_lite(current); \ } while (0) +static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) +{ + *nextevt = KTIME_MAX; + return 0; +} + /* * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. -- cgit v1.2.3 From a3883df3935e10caa8297719d85fa8eaff7cabbd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 9 May 2017 15:00:14 -0700 Subject: srcu: Use rnp->lock wrappers to replace explicit memory barriers This commit uses TREE RCU's rnp->lock wrappers to replace a few explicit memory barriers. This change also has the advantage of making SRCU's memory-ordering properties be implemented in roughly the same way as they are in Tree RCU. Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 24e949bda12a..42973f787e7e 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -40,7 +40,7 @@ struct srcu_data { unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ /* Update-side state. */ - spinlock_t lock ____cacheline_internodealigned_in_smp; + raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ @@ -58,7 +58,7 @@ struct srcu_data { * Node in SRCU combining tree, similar in function to rcu_data. */ struct srcu_node { - spinlock_t lock; + raw_spinlock_t __private lock; unsigned long srcu_have_cbs[4]; /* GP seq for children */ /* having CBs, but only */ /* is > ->srcu_gq_seq. */ @@ -78,7 +78,7 @@ struct srcu_struct { struct srcu_node *level[RCU_NUM_LVLS + 1]; /* First node at each level. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ - spinlock_t gp_lock; /* protect ->srcu_cblist */ + raw_spinlock_t __private lock; /* Protect counters */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned int srcu_idx; /* Current rdr array element. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ @@ -109,7 +109,7 @@ void process_srcu(struct work_struct *work); #define __SRCU_STRUCT_INIT(name) \ { \ .sda = &name##_srcu_data, \ - .gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock), \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = 0 - 1, \ __SRCU_DEP_MAP_INIT(name) \ } -- cgit v1.2.3 From fe5ac724d81a3c7803e60c2232718f212f3f38d4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 11 May 2017 11:26:22 -0700 Subject: rcu: Remove nohz_full full-system-idle state machine The NO_HZ_FULL_SYSIDLE full-system-idle capability was added in 2013 by commit 0edd1b1784cb ("nohz_full: Add full-system-idle state machine"), but has not been used. This commit therefore removes it. If it turns out to be needed later, this commit can always be reverted. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Rik van Riel Cc: Ingo Molnar Acked-by: Linus Torvalds --- include/linux/rcupdate.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ee40d7eba741..7f24a5e673f5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -854,15 +854,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) -/* Only for use by adaptive-ticks code. */ -#ifdef CONFIG_NO_HZ_FULL_SYSIDLE -bool rcu_sys_is_idle(void); -void rcu_sysidle_force_exit(void); -#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -static inline bool rcu_sys_is_idle(void) { return false; } -static inline void rcu_sysidle_force_exit(void) { } -#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ - /* * Place this after a lock-acquisition primitive to guarantee that -- cgit v1.2.3 From d2b1654f91f9e928011fbea7138854ee2044f470 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 11 May 2017 12:01:50 -0700 Subject: rcu: Remove #ifdef moving rcu_end_inkernel_boot from rcupdate.h This commit removes a #ifdef and saves a few lines of code by moving the rcu_end_inkernel_boot() function from include/linux/rcupdate.h to include/linux/rcutiny.h (for TINY_RCU) and to include/linux/rcutree.h (for TREE_RCU). Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ------ include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 1 + 3 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7f24a5e673f5..f816fc72b51e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -111,12 +111,6 @@ void rcu_check_callbacks(int user); void rcu_report_dead(unsigned int cpu); void rcu_cpu_starting(unsigned int cpu); -#ifndef CONFIG_TINY_RCU -void rcu_end_inkernel_boot(void); -#else /* #ifndef CONFIG_TINY_RCU */ -static inline void rcu_end_inkernel_boot(void) { } -#endif /* #ifndef CONFIG_TINY_RCU */ - #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index c869785f16bd..5becbbccb998 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -123,7 +123,7 @@ void rcu_scheduler_starting(void); #else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ static inline void rcu_scheduler_starting(void) { } #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ - +static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_is_watching(void) { return true; } /* Avoid RCU read-side critical sections leaking across. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d6aa89d15d47..37d6fd3b7ff8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -91,6 +91,7 @@ void exit_rcu(void); void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; +void rcu_end_inkernel_boot(void); bool rcu_is_watching(void); void rcu_all_qs(void); -- cgit v1.2.3 From 41a2901e7d220875752a8c870e0b53288a578c20 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 12 May 2017 15:56:35 -0700 Subject: rcu: Remove SPARSE_RCU_POINTER Kconfig option The sparse-based checking for non-RCU accesses to RCU-protected pointers has been around for a very long time, and it is now the only type of sparse-based checking that is optional. This commit therefore makes it unconditional. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Cc: Fengguang Wu --- include/linux/compiler.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f8110051188f..707242fdbb89 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,11 +17,7 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) -#ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) -#else /* CONFIG_SPARSE_RCU_POINTER */ -# define __rcu -#endif /* CONFIG_SPARSE_RCU_POINTER */ # define __private __attribute__((noderef)) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); -- cgit v1.2.3 From bd8cc5a062f41e334596edbe823e2fa0adddd1b7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 15 May 2017 14:57:01 -0700 Subject: srcu: Remove Classic SRCU Classic SRCU was only ever intended to be a fallback in case of issues with Tree/Tiny SRCU, and the latter two are doing quite well in testing. This commit therefore removes Classic SRCU. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 2 - include/linux/srcuclassic.h | 100 -------------------------------------------- 2 files changed, 102 deletions(-) delete mode 100644 include/linux/srcuclassic.h (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 5f509018e6b5..39af9bc0f653 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -60,8 +60,6 @@ int init_srcu_struct(struct srcu_struct *sp); #include #elif defined(CONFIG_TREE_SRCU) #include -#elif defined(CONFIG_CLASSIC_SRCU) -#include #elif defined(CONFIG_SRCU) #error "Unknown SRCU implementation specified to kernel configuration" #else diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h deleted file mode 100644 index 67db4a36ef0d..000000000000 --- a/include/linux/srcuclassic.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Sleepable Read-Copy Update mechanism for mutual exclusion, - * classic v4.11 variant. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * Copyright (C) IBM Corporation, 2017 - * - * Author: Paul McKenney - */ - -#ifndef _LINUX_SRCU_CLASSIC_H -#define _LINUX_SRCU_CLASSIC_H - -struct srcu_array { - unsigned long lock_count[2]; - unsigned long unlock_count[2]; -}; - -struct rcu_batch { - struct rcu_head *head, **tail; -}; - -#define RCU_BATCH_INIT(name) { NULL, &(name.head) } - -struct srcu_struct { - unsigned long completed; - struct srcu_array __percpu *per_cpu_ref; - spinlock_t queue_lock; /* protect ->batch_queue, ->running */ - bool running; - /* callbacks just queued */ - struct rcu_batch batch_queue; - /* callbacks try to do the first check_zero */ - struct rcu_batch batch_check0; - /* callbacks done with the first check_zero and the flip */ - struct rcu_batch batch_check1; - struct rcu_batch batch_done; - struct delayed_work work; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -}; - -void process_srcu(struct work_struct *work); - -#define __SRCU_STRUCT_INIT(name) \ - { \ - .completed = -300, \ - .per_cpu_ref = &name##_srcu_array, \ - .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .running = false, \ - .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ - .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ - .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ - .batch_done = RCU_BATCH_INIT(name.batch_done), \ - .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ - __SRCU_DEP_MAP_INIT(name) \ - } - -/* - * Define and initialize a srcu struct at build time. - * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. - * - * Note that although DEFINE_STATIC_SRCU() hides the name from other - * files, the per-CPU variable rules nevertheless require that the - * chosen name be globally unique. These rules also prohibit use of - * DEFINE_STATIC_SRCU() within a function. If these rules are too - * restrictive, declare the srcu_struct manually. For example, in - * each file: - * - * static struct srcu_struct my_srcu; - * - * Then, before the first use of each my_srcu, manually initialize it: - * - * init_srcu_struct(&my_srcu); - * - * See include/linux/percpu-defs.h for the rules on per-CPU variables. - */ -#define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) -#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) -#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) - -void synchronize_srcu_expedited(struct srcu_struct *sp); -void srcu_barrier(struct srcu_struct *sp); - -#endif -- cgit v1.2.3 From ecbaa83ee84cdf592c2891ca4c205b23d6b79a6f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Jun 2017 10:12:42 +0200 Subject: driver core: remove class_attrs from struct class This field is no longer used or needed (use class_groups instead), so it can be removed along with the driver core functionality that created and removed these files. Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 9a902ae33932..5b725b943cf2 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -365,7 +365,6 @@ int subsys_virtual_register(struct bus_type *subsys, * struct class - device classes * @name: Name of the class. * @owner: The module owner. - * @class_attrs: Default attributes of this class. * @class_groups: Default attributes of this class. * @dev_groups: Default attributes of the devices that belong to the class. * @dev_kobj: The kobject that represents this class and links it into the hierarchy. @@ -394,7 +393,6 @@ struct class { const char *name; struct module *owner; - struct class_attribute *class_attrs; const struct attribute_group **class_groups; const struct attribute_group **dev_groups; struct kobject *dev_kobj; -- cgit v1.2.3 From 9f4ac349bd60ef463450a00aa5e19c67f5ad12e2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 6 Jun 2017 14:17:02 +0200 Subject: sh: superhyway: use dev_groups and not dev_attrs for bus_type The dev_attrs field has long been "depreciated" and is finally being removed, so move the driver to use the "correct" dev_groups field instead for struct bus_type. Cc: Yoshinori Sato Cc: Rich Felker Cc: Greg Kroah-Hartman Cc: Signed-off-by: Greg Kroah-Hartman --- include/linux/superhyway.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/superhyway.h b/include/linux/superhyway.h index 17ea468fa362..8d3376775813 100644 --- a/include/linux/superhyway.h +++ b/include/linux/superhyway.h @@ -101,7 +101,7 @@ int superhyway_add_device(unsigned long base, struct superhyway_device *, struct int superhyway_add_devices(struct superhyway_bus *bus, struct superhyway_device **devices, int nr_devices); /* drivers/sh/superhyway/superhyway-sysfs.c */ -extern struct device_attribute superhyway_dev_attrs[]; +extern const struct attribute_group *superhyway_dev_groups[]; #endif /* __LINUX_SUPERHYWAY_H */ -- cgit v1.2.3 From f42fe520e449bda213f035478ddc9cf99e9082ac Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 5 Jun 2017 14:15:06 -0600 Subject: coresight: use const for device_node structures Almost low level functions from open firmware have used const to qualify device_node structures, so add const for device_node parameters in of_coresight related functions. Signed-off-by: Leo Yan Reviewed-by: Stephen Boyd Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 035c16c9a505..bf0aa50880be 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -263,11 +263,12 @@ static inline int coresight_timeout(void __iomem *addr, u32 offset, #endif #ifdef CONFIG_OF -extern struct coresight_platform_data *of_get_coresight_platform_data( - struct device *dev, struct device_node *node); +extern struct coresight_platform_data * +of_get_coresight_platform_data(struct device *dev, + const struct device_node *node); #else static inline struct coresight_platform_data *of_get_coresight_platform_data( - struct device *dev, struct device_node *node) { return NULL; } + struct device *dev, const struct device_node *node) { return NULL; } #endif #ifdef CONFIG_PID_NS -- cgit v1.2.3 From c56cdd7a5c836db7834256f09692112afee9eb3f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 5 Jun 2017 14:15:15 -0600 Subject: coresight: refactor with function of_coresight_get_cpu This is refactor to add function of_coresight_get_cpu(), so it's used to retrieve CPU id for coresight component. Finally can use it as a common function for multiple places. Suggested-by: Mathieu Poirier Signed-off-by: Leo Yan Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index bf0aa50880be..d950dad5056a 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -263,10 +263,13 @@ static inline int coresight_timeout(void __iomem *addr, u32 offset, #endif #ifdef CONFIG_OF +extern int of_coresight_get_cpu(const struct device_node *node); extern struct coresight_platform_data * of_get_coresight_platform_data(struct device *dev, const struct device_node *node); #else +static inline int of_coresight_get_cpu(const struct device_node *node) +{ return 0; } static inline struct coresight_platform_data *of_get_coresight_platform_data( struct device *dev, const struct device_node *node) { return NULL; } #endif -- cgit v1.2.3 From 0cbaa44841db3d06d2a21ae4ab679882033f3dbe Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 6 Jun 2017 16:08:39 -0500 Subject: lib: Add crc4 module Add a little helper for crc4 calculations. This works 4-bits-at-a-time, using a simple table approach. We will need this in the FSI core code, as well as any master implementations that need to calculate CRCs in software. Signed-off-by: Jeremy Kerr Signed-off-by: Chris Bostic Signed-off-by: Joel Stanley Signed-off-by: Greg Kroah-Hartman --- include/linux/crc4.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 include/linux/crc4.h (limited to 'include/linux') diff --git a/include/linux/crc4.h b/include/linux/crc4.h new file mode 100644 index 000000000000..8f739f1d794f --- /dev/null +++ b/include/linux/crc4.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_CRC4_H +#define _LINUX_CRC4_H + +#include + +extern uint8_t crc4(uint8_t c, uint64_t x, int bits); + +#endif /* _LINUX_CRC4_H */ -- cgit v1.2.3 From f7ade2a603cfd205a6d7afb9d96ac7975f666dd6 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 6 Jun 2017 16:08:44 -0500 Subject: drivers/fsi: scan slaves & register devices Now that we have fsi_slave devices, scan each for endpoints, and register them on the fsi bus. Includes contributions from Christopher Bostic . Signed-off-by: Jeremy Kerr Signed-off-by: Christopher Bostic Signed-off-by: Joel Stanley Signed-off-by: Greg Kroah-Hartman --- include/linux/fsi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsi.h b/include/linux/fsi.h index 273cbf6400ea..efa55ba6cb39 100644 --- a/include/linux/fsi.h +++ b/include/linux/fsi.h @@ -21,6 +21,10 @@ struct fsi_device { struct device dev; u8 engine_type; u8 version; + u8 unit; + struct fsi_slave *slave; + uint32_t addr; + uint32_t size; }; struct fsi_device_id { -- cgit v1.2.3 From 4efe37f4c4efcb73562e4634cb6c262b08ab6451 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 6 Jun 2017 16:08:45 -0500 Subject: drivers/fsi: Add device read/write/peek API This change introduces the fsi device API: simple read, write and peek accessors for the devices' address spaces. Includes contributions from Christopher Bostic and Edward A. James . Signed-off-by: Edward A. James Signed-off-by: Jeremy Kerr Signed-off-by: Christopher Bostic Signed-off-by: Joel Stanley Signed-off-by: Greg Kroah-Hartman --- include/linux/fsi.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsi.h b/include/linux/fsi.h index efa55ba6cb39..66bce4851ff6 100644 --- a/include/linux/fsi.h +++ b/include/linux/fsi.h @@ -27,6 +27,12 @@ struct fsi_device { uint32_t size; }; +extern int fsi_device_read(struct fsi_device *dev, uint32_t addr, + void *val, size_t size); +extern int fsi_device_write(struct fsi_device *dev, uint32_t addr, + const void *val, size_t size); +extern int fsi_device_peek(struct fsi_device *dev, void *val); + struct fsi_device_id { u8 engine_type; u8 version; @@ -40,7 +46,6 @@ struct fsi_device_id { #define FSI_DEVICE_VERSIONED(t, v) \ .engine_type = (t), .version = (v), - struct fsi_driver { struct device_driver drv; const struct fsi_device_id *id_table; -- cgit v1.2.3 From 356d8009a5a4569f17a3508b50a347bdf4d5b337 Mon Sep 17 00:00:00 2001 From: Christopher Bostic Date: Tue, 6 Jun 2017 16:08:48 -0500 Subject: drivers/fsi: Add client driver register utilities Add driver_register and driver_unregister wrappers for FSI. Signed-off-by: Christopher Bostic Signed-off-by: Joel Stanley Signed-off-by: Greg Kroah-Hartman --- include/linux/fsi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsi.h b/include/linux/fsi.h index 66bce4851ff6..34f1e9aea725 100644 --- a/include/linux/fsi.h +++ b/include/linux/fsi.h @@ -54,6 +54,18 @@ struct fsi_driver { #define to_fsi_dev(devp) container_of(devp, struct fsi_device, dev) #define to_fsi_drv(drvp) container_of(drvp, struct fsi_driver, drv) +extern int fsi_driver_register(struct fsi_driver *fsi_drv); +extern void fsi_driver_unregister(struct fsi_driver *fsi_drv); + +/* module_fsi_driver() - Helper macro for drivers that don't do + * anything special in module init/exit. This eliminates a lot of + * boilerplate. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit() + */ +#define module_fsi_driver(__fsi_driver) \ + module_driver(__fsi_driver, fsi_driver_register, \ + fsi_driver_unregister) + extern struct bus_type fsi_bus_type; #endif /* LINUX_FSI_H */ -- cgit v1.2.3 From da36cadf89a75a730302a4df114cb930b1becc39 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 6 Jun 2017 16:08:50 -0500 Subject: drivers/fsi: expose direct-access slave API Allow drivers to access the slave address ranges. Signed-off-by: Jeremy Kerr Signed-off-by: Joel Stanley Signed-off-by: Christopher Bostic Signed-off-by: Greg Kroah-Hartman --- include/linux/fsi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsi.h b/include/linux/fsi.h index 34f1e9aea725..141fd38d061f 100644 --- a/include/linux/fsi.h +++ b/include/linux/fsi.h @@ -66,6 +66,18 @@ extern void fsi_driver_unregister(struct fsi_driver *fsi_drv); module_driver(__fsi_driver, fsi_driver_register, \ fsi_driver_unregister) +/* direct slave API */ +extern int fsi_slave_claim_range(struct fsi_slave *slave, + uint32_t addr, uint32_t size); +extern void fsi_slave_release_range(struct fsi_slave *slave, + uint32_t addr, uint32_t size); +extern int fsi_slave_read(struct fsi_slave *slave, uint32_t addr, + void *val, size_t size); +extern int fsi_slave_write(struct fsi_slave *slave, uint32_t addr, + const void *val, size_t size); + + + extern struct bus_type fsi_bus_type; #endif /* LINUX_FSI_H */ -- cgit v1.2.3 From de0d6dbdbdb23ddb85f10d54a516e794f9a873e0 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Mon, 5 Jun 2017 08:52:08 -0500 Subject: w1: Add subsystem kernel public interface Like other subsystems we should be able to define slave devices outside of the w1 directory. To do this we move public facing interface definitions to include/linux/w1.h and rename the internal definition file to w1_internal.h. As w1_family.h and w1_int.h contained almost entirely public driver interface definitions we simply removed these files and moved the remaining definitions into w1_internal.h. With this we can now start to move slave devices out of w1/slaves and into the subsystem based on the function they implement, again like other drivers. Signed-off-by: Andrew F. Davis Reviewed-by: Sebastian Reichel Acked-by: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- include/linux/w1.h | 320 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 320 insertions(+) create mode 100644 include/linux/w1.h (limited to 'include/linux') diff --git a/include/linux/w1.h b/include/linux/w1.h new file mode 100644 index 000000000000..90cbe7e65059 --- /dev/null +++ b/include/linux/w1.h @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2004 Evgeniy Polyakov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_W1_H +#define __LINUX_W1_H + +#include + +/** + * struct w1_reg_num - broken out slave device id + * + * @family: identifies the type of device + * @id: along with family is the unique device id + * @crc: checksum of the other bytes + */ +struct w1_reg_num { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 family:8, + id:48, + crc:8; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u64 crc:8, + id:48, + family:8; +#else +#error "Please fix " +#endif +}; + +#ifdef __KERNEL__ + +#define W1_MAXNAMELEN 32 + +#define W1_SEARCH 0xF0 +#define W1_ALARM_SEARCH 0xEC +#define W1_CONVERT_TEMP 0x44 +#define W1_SKIP_ROM 0xCC +#define W1_COPY_SCRATCHPAD 0x48 +#define W1_WRITE_SCRATCHPAD 0x4E +#define W1_READ_SCRATCHPAD 0xBE +#define W1_READ_ROM 0x33 +#define W1_READ_PSUPPLY 0xB4 +#define W1_MATCH_ROM 0x55 +#define W1_RESUME_CMD 0xA5 + +/** + * struct w1_slave - holds a single slave device on the bus + * + * @owner: Points to the one wire "wire" kernel module. + * @name: Device id is ascii. + * @w1_slave_entry: data for the linked list + * @reg_num: the slave id in binary + * @refcnt: reference count, delete when 0 + * @flags: bit flags for W1_SLAVE_ACTIVE W1_SLAVE_DETACH + * @ttl: decrement per search this slave isn't found, deatch at 0 + * @master: bus which this slave is on + * @family: module for device family type + * @family_data: pointer for use by the family module + * @dev: kernel device identifier + * + */ +struct w1_slave { + struct module *owner; + unsigned char name[W1_MAXNAMELEN]; + struct list_head w1_slave_entry; + struct w1_reg_num reg_num; + atomic_t refcnt; + int ttl; + unsigned long flags; + + struct w1_master *master; + struct w1_family *family; + void *family_data; + struct device dev; +}; + +typedef void (*w1_slave_found_callback)(struct w1_master *, u64); + +/** + * struct w1_bus_master - operations available on a bus master + * + * @data: the first parameter in all the functions below + * + * @read_bit: Sample the line level @return the level read (0 or 1) + * + * @write_bit: Sets the line level + * + * @touch_bit: the lowest-level function for devices that really support the + * 1-wire protocol. + * touch_bit(0) = write-0 cycle + * touch_bit(1) = write-1 / read cycle + * @return the bit read (0 or 1) + * + * @read_byte: Reads a bytes. Same as 8 touch_bit(1) calls. + * @return the byte read + * + * @write_byte: Writes a byte. Same as 8 touch_bit(x) calls. + * + * @read_block: Same as a series of read_byte() calls + * @return the number of bytes read + * + * @write_block: Same as a series of write_byte() calls + * + * @triplet: Combines two reads and a smart write for ROM searches + * @return bit0=Id bit1=comp_id bit2=dir_taken + * + * @reset_bus: long write-0 with a read for the presence pulse detection + * @return -1=Error, 0=Device present, 1=No device present + * + * @set_pullup: Put out a strong pull-up pulse of the specified duration. + * @return -1=Error, 0=completed + * + * @search: Really nice hardware can handles the different types of ROM search + * w1_master* is passed to the slave found callback. + * u8 is search_type, W1_SEARCH or W1_ALARM_SEARCH + * + * Note: read_bit and write_bit are very low level functions and should only + * be used with hardware that doesn't really support 1-wire operations, + * like a parallel/serial port. + * Either define read_bit and write_bit OR define, at minimum, touch_bit and + * reset_bus. + * + */ +struct w1_bus_master { + void *data; + + u8 (*read_bit)(void *); + + void (*write_bit)(void *, u8); + + u8 (*touch_bit)(void *, u8); + + u8 (*read_byte)(void *); + + void (*write_byte)(void *, u8); + + u8 (*read_block)(void *, u8 *, int); + + void (*write_block)(void *, const u8 *, int); + + u8 (*triplet)(void *, u8); + + u8 (*reset_bus)(void *); + + u8 (*set_pullup)(void *, int); + + void (*search)(void *, struct w1_master *, + u8, w1_slave_found_callback); +}; + +/** + * enum w1_master_flags - bitfields used in w1_master.flags + * @W1_ABORT_SEARCH: abort searching early on shutdown + * @W1_WARN_MAX_COUNT: limit warning when the maximum count is reached + */ +enum w1_master_flags { + W1_ABORT_SEARCH = 0, + W1_WARN_MAX_COUNT = 1, +}; + +/** + * struct w1_master - one per bus master + * @w1_master_entry: master linked list + * @owner: module owner + * @name: dynamically allocate bus name + * @list_mutex: protect slist and async_list + * @slist: linked list of slaves + * @async_list: linked list of netlink commands to execute + * @max_slave_count: maximum number of slaves to search for at a time + * @slave_count: current number of slaves known + * @attempts: number of searches ran + * @slave_ttl: number of searches before a slave is timed out + * @initialized: prevent init/removal race conditions + * @id: w1 bus number + * @search_count: number of automatic searches to run, -1 unlimited + * @search_id: allows continuing a search + * @refcnt: reference count + * @priv: private data storage + * @enable_pullup: allows a strong pullup + * @pullup_duration: time for the next strong pullup + * @flags: one of w1_master_flags + * @thread: thread for bus search and netlink commands + * @mutex: protect most of w1_master + * @bus_mutex: pretect concurrent bus access + * @driver: sysfs driver + * @dev: sysfs device + * @bus_master: io operations available + * @seq: sequence number used for netlink broadcasts + */ +struct w1_master { + struct list_head w1_master_entry; + struct module *owner; + unsigned char name[W1_MAXNAMELEN]; + /* list_mutex protects just slist and async_list so slaves can be + * searched for and async commands added while the master has + * w1_master.mutex locked and is operating on the bus. + * lock order w1_mlock, w1_master.mutex, w1_master.list_mutex + */ + struct mutex list_mutex; + struct list_head slist; + struct list_head async_list; + int max_slave_count, slave_count; + unsigned long attempts; + int slave_ttl; + int initialized; + u32 id; + int search_count; + /* id to start searching on, to continue a search or 0 to restart */ + u64 search_id; + + atomic_t refcnt; + + void *priv; + + /** 5V strong pullup enabled flag, 1 enabled, zero disabled. */ + int enable_pullup; + /** 5V strong pullup duration in milliseconds, zero disabled. */ + int pullup_duration; + + long flags; + + struct task_struct *thread; + struct mutex mutex; + struct mutex bus_mutex; + + struct device_driver *driver; + struct device dev; + + struct w1_bus_master *bus_master; + + u32 seq; +}; + +int w1_add_master_device(struct w1_bus_master *master); +void w1_remove_master_device(struct w1_bus_master *master); + +/** + * struct w1_family_ops - operations for a family type + * @add_slave: add_slave + * @remove_slave: remove_slave + * @groups: sysfs group + */ +struct w1_family_ops { + int (*add_slave)(struct w1_slave *sl); + void (*remove_slave)(struct w1_slave *sl); + const struct attribute_group **groups; +}; + +/** + * struct w1_family - reference counted family structure. + * @family_entry: family linked list + * @fid: 8 bit family identifier + * @fops: operations for this family + * @refcnt: reference counter + */ +struct w1_family { + struct list_head family_entry; + u8 fid; + + struct w1_family_ops *fops; + + atomic_t refcnt; +}; + +int w1_register_family(struct w1_family *family); +void w1_unregister_family(struct w1_family *family); + +/** + * module_w1_driver() - Helper macro for registering a 1-Wire families + * @__w1_family: w1_family struct + * + * Helper macro for 1-Wire families which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_w1_family(__w1_family) \ + module_driver(__w1_family, w1_register_family, \ + w1_unregister_family) + +u8 w1_triplet(struct w1_master *dev, int bdir); +void w1_write_8(struct w1_master *, u8); +u8 w1_read_8(struct w1_master *); +int w1_reset_bus(struct w1_master *); +u8 w1_calc_crc8(u8 *, int); +void w1_write_block(struct w1_master *, const u8 *, int); +void w1_touch_block(struct w1_master *, u8 *, int); +u8 w1_read_block(struct w1_master *, u8 *, int); +int w1_reset_select_slave(struct w1_slave *sl); +int w1_reset_resume_command(struct w1_master *); +void w1_next_pullup(struct w1_master *, int); + +static inline struct w1_slave* dev_to_w1_slave(struct device *dev) +{ + return container_of(dev, struct w1_slave, dev); +} + +static inline struct w1_slave* kobj_to_w1_slave(struct kobject *kobj) +{ + return dev_to_w1_slave(container_of(kobj, struct device, kobj)); +} + +static inline struct w1_master* dev_to_w1_master(struct device *dev) +{ + return container_of(dev, struct w1_master, dev); +} + +#endif /* __KERNEL__ */ + +#endif /* __LINUX_W1_H */ -- cgit v1.2.3 From 4055351cdbb44e8646ff67b346c80097e1d2c04c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Jun 2017 09:37:58 +0200 Subject: fs: remove the unused error argument to dio_end_io() Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..4388ab58843d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2843,7 +2843,7 @@ enum { DIO_SKIP_DIO_COUNT = 0x08, }; -void dio_end_io(struct bio *bio, int error); +void dio_end_io(struct bio *bio); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, -- cgit v1.2.3 From 1be5690984588953e759af0a4c6ddac182a1806c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Jun 2017 09:38:03 +0200 Subject: dm: change ->end_io calling convention Turn the error paramter into a pointer so that target drivers can change the value, and make sure only DM_ENDIO_* values are returned from the methods. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/device-mapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index f4c639c0c362..dec227acc13b 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -72,7 +72,7 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone); * 2 : The target wants to push back the io */ typedef int (*dm_endio_fn) (struct dm_target *ti, - struct bio *bio, int error); + struct bio *bio, int *error); typedef int (*dm_request_endio_fn) (struct dm_target *ti, struct request *clone, int error, union map_info *map_context); -- cgit v1.2.3 From 2a842acab109f40f0d7d10b38e9ca88390628996 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Jun 2017 09:38:04 +0200 Subject: block: introduce new block status code type Currently we use nornal Linux errno values in the block layer, and while we accept any error a few have overloaded magic meanings. This patch instead introduces a new blk_status_t value that holds block layer specific status codes and explicitly explains their meaning. Helpers to convert from and to the previous special meanings are provided for now, but I suspect we want to get rid of them in the long run - those drivers that have a errno input (e.g. networking) usually get errnos that don't know about the special block layer overloads, and similarly returning them to userspace will usually return somethings that strictly speaking isn't correct for file system operations, but that's left as an exercise for later. For now the set of errors is a very limited set that closely corresponds to the previous overloaded errno values, but there is some low hanging fruite to improve it. blk_status_t (ab)uses the sparse __bitwise annotations to allow for sparse typechecking, so that we can easily catch places passing the wrong values. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- include/linux/blk_types.h | 16 ++++++++++++++++ include/linux/blkdev.h | 21 ++++++++++++--------- include/linux/device-mapper.h | 2 +- include/linux/ide.h | 6 +++--- 5 files changed, 34 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fcd641032f8d..0cf6735046d3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -230,8 +230,8 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) int blk_mq_request_started(struct request *rq); void blk_mq_start_request(struct request *rq); -void blk_mq_end_request(struct request *rq, int error); -void __blk_mq_end_request(struct request *rq, int error); +void blk_mq_end_request(struct request *rq, blk_status_t error); +void __blk_mq_end_request(struct request *rq, blk_status_t error); void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 61339bc44400..59378939a8cd 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -17,6 +17,22 @@ struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); +/* + * Block error status values. See block/blk-core:blk_errors for the details. + */ +typedef u8 __bitwise blk_status_t; +#define BLK_STS_OK 0 +#define BLK_STS_NOTSUPP ((__force blk_status_t)1) +#define BLK_STS_TIMEOUT ((__force blk_status_t)2) +#define BLK_STS_NOSPC ((__force blk_status_t)3) +#define BLK_STS_TRANSPORT ((__force blk_status_t)4) +#define BLK_STS_TARGET ((__force blk_status_t)5) +#define BLK_STS_NEXUS ((__force blk_status_t)6) +#define BLK_STS_MEDIUM ((__force blk_status_t)7) +#define BLK_STS_PROTECTION ((__force blk_status_t)8) +#define BLK_STS_RESOURCE ((__force blk_status_t)9) +#define BLK_STS_IOERR ((__force blk_status_t)10) + struct blk_issue_stat { u64 stat; }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 019f18c65098..2a8871638453 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -55,7 +55,7 @@ struct blk_stat_callback; */ #define BLKCG_MAX_POLS 3 -typedef void (rq_end_io_fn)(struct request *, int); +typedef void (rq_end_io_fn)(struct request *, blk_status_t); #define BLK_RL_SYNCFULL (1U << 0) #define BLK_RL_ASYNCFULL (1U << 1) @@ -940,7 +940,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); extern void blk_rq_unprep_clone(struct request *rq); -extern int blk_insert_cloned_request(struct request_queue *q, +extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio *bio); extern void blk_delay_queue(struct request_queue *, unsigned long); @@ -980,6 +980,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); +int blk_status_to_errno(blk_status_t status); +blk_status_t errno_to_blk_status(int errno); + bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) @@ -1112,16 +1115,16 @@ extern struct request *blk_fetch_request(struct request_queue *q); * blk_end_request() for parts of the original function. * This prevents code duplication in drivers. */ -extern bool blk_update_request(struct request *rq, int error, +extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void blk_finish_request(struct request *rq, int error); -extern bool blk_end_request(struct request *rq, int error, +extern void blk_finish_request(struct request *rq, blk_status_t error); +extern bool blk_end_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void blk_end_request_all(struct request *rq, int error); -extern bool __blk_end_request(struct request *rq, int error, +extern void blk_end_request_all(struct request *rq, blk_status_t error); +extern bool __blk_end_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void __blk_end_request_all(struct request *rq, int error); -extern bool __blk_end_request_cur(struct request *rq, int error); +extern void __blk_end_request_all(struct request *rq, blk_status_t error); +extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index dec227acc13b..5de5c53251ec 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -74,7 +74,7 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone); typedef int (*dm_endio_fn) (struct dm_target *ti, struct bio *bio, int *error); typedef int (*dm_request_endio_fn) (struct dm_target *ti, - struct request *clone, int error, + struct request *clone, blk_status_t error, union map_info *map_context); typedef void (*dm_presuspend_fn) (struct dm_target *ti); diff --git a/include/linux/ide.h b/include/linux/ide.h index 6980ca322074..dc152e4b7f73 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -671,7 +671,7 @@ struct ide_port_ops { void (*init_dev)(ide_drive_t *); void (*set_pio_mode)(struct hwif_s *, ide_drive_t *); void (*set_dma_mode)(struct hwif_s *, ide_drive_t *); - int (*reset_poll)(ide_drive_t *); + blk_status_t (*reset_poll)(ide_drive_t *); void (*pre_reset)(ide_drive_t *); void (*resetproc)(ide_drive_t *); void (*maskproc)(ide_drive_t *, int); @@ -1092,7 +1092,7 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l extern int ide_vlb_clk; extern int ide_pci_clk; -int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int); +int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int); void ide_kill_rq(ide_drive_t *, struct request *); void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); @@ -1123,7 +1123,7 @@ extern int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, int arg); void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8); -int ide_complete_rq(ide_drive_t *, int, unsigned int); +int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int); void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd); void ide_tf_dump(const char *, struct ide_cmd *); -- cgit v1.2.3 From fc17b6534eb8395f0b3133eb31d87deec32c642b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Jun 2017 09:38:05 +0200 Subject: blk-mq: switch ->queue_rq return value to blk_status_t Use the same values for use for request completion errors as the return value from ->queue_rq. BLK_STS_RESOURCE is special cased to cause a requeue, and all the others are completed as-is. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0cf6735046d3..b144b7b0e104 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -87,7 +87,8 @@ struct blk_mq_queue_data { bool last; }; -typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, + const struct blk_mq_queue_data *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); @@ -155,10 +156,6 @@ struct blk_mq_ops { }; enum { - BLK_MQ_RQ_QUEUE_OK = 0, /* queued fine */ - BLK_MQ_RQ_QUEUE_BUSY = 1, /* requeue IO for later */ - BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */ - BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_SHARED = 1 << 1, BLK_MQ_F_SG_MERGE = 1 << 2, -- cgit v1.2.3 From 4e4cbee93d56137ebff722be022cae5f70ef84fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Jun 2017 09:38:06 +0200 Subject: block: switch bios to blk_status_t Replace bi_error with a new bi_status to allow for a clear conversion. Note that device mapper overloaded bi_error with a private value, which we'll have to keep arround at least for now and thus propagate to a proper blk_status_t value. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- include/linux/blk_types.h | 5 ++++- include/linux/blkdev.h | 2 +- include/linux/device-mapper.h | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index d1b04b0e99cf..9455aada1399 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -414,7 +414,7 @@ extern void bio_endio(struct bio *); static inline void bio_io_error(struct bio *bio) { - bio->bi_error = -EIO; + bio->bi_status = BLK_STS_IOERR; bio_endio(bio); } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 59378939a8cd..dcd45b15a3a5 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -33,6 +33,9 @@ typedef u8 __bitwise blk_status_t; #define BLK_STS_RESOURCE ((__force blk_status_t)9) #define BLK_STS_IOERR ((__force blk_status_t)10) +/* hack for device mapper, don't use elsewhere: */ +#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) + struct blk_issue_stat { u64 stat; }; @@ -44,7 +47,7 @@ struct blk_issue_stat { struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; - int bi_error; + blk_status_t bi_status; unsigned int bi_opf; /* bottom bits req flags, * top bits REQ_OP. Use * accessors. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2a8871638453..76b6df862a12 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1782,7 +1782,7 @@ struct blk_integrity_iter { const char *disk_name; }; -typedef int (integrity_processing_fn) (struct blk_integrity_iter *); +typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); struct blk_integrity_profile { integrity_processing_fn *generate_fn; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 5de5c53251ec..456da5017b32 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -72,7 +72,7 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone); * 2 : The target wants to push back the io */ typedef int (*dm_endio_fn) (struct dm_target *ti, - struct bio *bio, int *error); + struct bio *bio, blk_status_t *error); typedef int (*dm_request_endio_fn) (struct dm_target *ti, struct request *clone, blk_status_t error, union map_info *map_context); -- cgit v1.2.3 From 0aed55af88345b5d673240f90e671d79662fb01e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:22:50 -0700 Subject: x86, uaccess: introduce copy_from_iter_flushcache for pmem / cache-bypass operations The pmem driver has a need to transfer data with a persistent memory destination and be able to rely on the fact that the destination writes are not cached. It is sufficient for the writes to be flushed to a cpu-store-buffer (non-temporal / "movnt" in x86 terms), as we expect userspace to call fsync() to ensure data-writes have reached a power-fail-safe zone in the platform. The fsync() triggers a REQ_FUA or REQ_FLUSH to the pmem driver which will turn around and fence previous writes with an "sfence". Implement a __copy_from_user_inatomic_flushcache, memcpy_page_flushcache, and memcpy_flushcache, that guarantee that the destination buffer is not dirty in the cpu cache on completion. The new copy_from_iter_flushcache and sub-routines will be used to replace the "pmem api" (include/linux/pmem.h + arch/x86/include/asm/pmem.h). The availability of copy_from_iter_flushcache() and memcpy_flushcache() are gated by the CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE config symbol, and fallback to copy_from_iter_nocache() and plain memcpy() otherwise. This is meant to satisfy the concern from Linus that if a driver wants to do something beyond the normal nocache semantics it should be something private to that driver [1], and Al's concern that anything uaccess related belongs with the rest of the uaccess code [2]. The first consumer of this interface is a new 'copy_from_iter' dax operation so that pmem can inject cache maintenance operations without imposing this overhead on other dax-capable drivers. [1]: https://lists.01.org/pipermail/linux-nvdimm/2017-January/008364.html [2]: https://lists.01.org/pipermail/linux-nvdimm/2017-April/009942.html Cc: Cc: Jan Kara Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Toshi Kani Cc: "H. Peter Anvin" Cc: Al Viro Cc: Thomas Gleixner Cc: Matthew Wilcox Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- include/linux/dax.h | 3 +++ include/linux/string.h | 6 ++++++ include/linux/uio.h | 15 +++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 5ec1f6c47716..bbe79ed90e2b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -16,6 +16,9 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, void **, pfn_t *); + /* copy_from_iter: dax-driver override for default copy_from_iter */ + size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, + struct iov_iter *); }; #if IS_ENABLED(CONFIG_DAX) diff --git a/include/linux/string.h b/include/linux/string.h index 537918f8a98e..7439d83eaa33 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -122,6 +122,12 @@ static inline __must_check int memcpy_mcsafe(void *dst, const void *src, return 0; } #endif +#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE +static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) +{ + memcpy(dst, src, cnt); +} +#endif void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *s, char old, char new); diff --git a/include/linux/uio.h b/include/linux/uio.h index f2d36a3d3005..55cd54a0e941 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -95,6 +95,21 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/* + * Note, users like pmem that depend on the stricter semantics of + * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for + * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the + * destination is flushed from the cache on return. + */ +size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); +#else +static inline size_t copy_from_iter_flushcache(void *addr, size_t bytes, + struct iov_iter *i) +{ + return copy_from_iter_nocache(addr, bytes, i); +} +#endif bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); -- cgit v1.2.3 From 7e026c8c0a4200da86bc51edeaad79dcdccf78ca Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:57:56 -0700 Subject: dm: add ->copy_from_iter() dax operation support Allow device-mapper to route copy_from_iter operations to the per-target implementation. In order for the device stacking to work we need a dax_dev and a pgoff relative to that device. This gives each layer of the stack the information it needs to look up the operation pointer for the next level. This conceptually allows for an array of mixed device drivers with varying copy_from_iter implementations. Reviewed-by: Toshi Kani Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- include/linux/dax.h | 2 ++ include/linux/device-mapper.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index bbe79ed90e2b..28e398f8c59e 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -78,6 +78,8 @@ void kill_dax(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); +size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); /* * We use lowest available bit in exceptional entry for locking, one bit for diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index f4c639c0c362..11c8a0a92f9c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -132,6 +132,8 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); +typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -181,6 +183,7 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; + dm_dax_copy_from_iter_fn dax_copy_from_iter; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From 3ad7d2468f79fc13215eb941f766a692d34b1381 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Thu, 8 Jun 2017 13:12:14 -0700 Subject: Ipvlan should return an error when an address is already in use. The ipvlan code already knows how to detect when a duplicate address is about to be assigned to an ipvlan device. However, that failure is not propogated outward and leads to a silent failure. Introduce a validation step at ip address creation time and allow device drivers to register to validate the incoming ip addresses. The ipvlan code is the first consumer. If it detects an address in use, we can return an error to the user before beginning to commit the new ifa in the networking code. This can be especially useful if it is necessary to provision many ipvlans in containers. The provisioning software (or operator) can use this to detect situations where an ip address is unexpectedly in use. Signed-off-by: Krister Johansen Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index a2e9d6ea1349..e7c04c4e4bcd 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -150,8 +150,15 @@ struct in_ifaddr { unsigned long ifa_tstamp; /* updated timestamp */ }; +struct in_validator_info { + __be32 ivi_addr; + struct in_device *ivi_dev; +}; + int register_inetaddr_notifier(struct notifier_block *nb); int unregister_inetaddr_notifier(struct notifier_block *nb); +int register_inetaddr_validator_notifier(struct notifier_block *nb); +int unregister_inetaddr_validator_notifier(struct notifier_block *nb); void inet_netconf_notify_devconf(struct net *net, int event, int type, int ifindex, struct ipv4_devconf *devconf); -- cgit v1.2.3 From 7c7973b2ae277c6e89dceda2246fff2472c8ffdb Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Fri, 9 Jun 2017 17:13:18 +0300 Subject: qed: LL2 to use packed information for tx First step in revising the LL2 interface, this declares qed_ll2_tx_pkt_info as part of the ll2 interface, and uses it for transmission instead of receiving lots of parameters. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 4fb4666ea879..78d9ed6c6b6f 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -43,6 +43,18 @@ #include #include +enum qed_ll2_roce_flavor_type { + QED_LL2_ROCE, + QED_LL2_RROCE, + MAX_QED_LL2_ROCE_FLAVOR_TYPE +}; + +enum qed_ll2_tx_dest { + QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */ + QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */ + QED_LL2_TX_DEST_MAX +}; + struct qed_ll2_stats { u64 gsi_invalid_hdr; u64 gsi_invalid_pkt_length; @@ -67,6 +79,21 @@ struct qed_ll2_stats { u64 sent_bcast_pkts; }; +struct qed_ll2_tx_pkt_info { + void *cookie; + dma_addr_t first_frag; + enum qed_ll2_tx_dest tx_dest; + enum qed_ll2_roce_flavor_type qed_roce_flavor; + u16 vlan; + u16 l4_hdr_offset_w; /* from start of packet */ + u16 first_frag_len; + u8 num_of_bds; + u8 bd_flags; + bool enable_ip_cksum; + bool enable_l4_cksum; + bool calc_ip_len; +}; + #define QED_LL2_UNUSED_HANDLE (0xff) struct qed_ll2_cb_ops { -- cgit v1.2.3 From 68be910cd2fa3f58587438af7ce3def6e03731fa Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Fri, 9 Jun 2017 17:13:19 +0300 Subject: qed: Revise ll2 Rx completion This introduces qed_ll2_comp_rx_data as a public struct and moves handling of Rx packets in LL2 into using it. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 78d9ed6c6b6f..056ac007dd12 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -79,6 +79,32 @@ struct qed_ll2_stats { u64 sent_bcast_pkts; }; +struct qed_ll2_comp_rx_data { + void *cookie; + dma_addr_t rx_buf_addr; + u16 parse_flags; + u16 vlan; + bool b_last_packet; + u8 connection_handle; + + union { + u16 packet_length; + u16 data_length; + } length; + + u32 opaque_data_0; + u32 opaque_data_1; + + /* GSI only */ + u32 gid_dst[4]; + u16 qp_id; + + union { + u8 placement_offset; + u8 data_length_error; + } u; +}; + struct qed_ll2_tx_pkt_info { void *cookie; dma_addr_t first_frag; -- cgit v1.2.3 From 13c547717231aad7e1635004ae3f698e5e78d6d1 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Fri, 9 Jun 2017 17:13:20 +0300 Subject: qed: Cleaner seperation of LL2 inputs A LL2 connection [qed_ll2_info] has a sub-structure of type qed_ll2_conn that contain various inputs for ll2 acquisition, but the connection also utilizes a couple of other inputs. Restructure the input structure to include all the inputs and refactor the code necessary to populate those. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 056ac007dd12..a1f63eca430a 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -43,6 +43,17 @@ #include #include +enum qed_ll2_conn_type { + QED_LL2_TYPE_FCOE, + QED_LL2_TYPE_ISCSI, + QED_LL2_TYPE_TEST, + QED_LL2_TYPE_ISCSI_OOO, + QED_LL2_TYPE_RESERVED2, + QED_LL2_TYPE_ROCE, + QED_LL2_TYPE_RESERVED3, + MAX_QED_LL2_RX_CONN_TYPE +}; + enum qed_ll2_roce_flavor_type { QED_LL2_ROCE, QED_LL2_RROCE, @@ -55,6 +66,12 @@ enum qed_ll2_tx_dest { QED_LL2_TX_DEST_MAX }; +enum qed_ll2_error_handle { + QED_LL2_DROP_PACKET, + QED_LL2_DO_NOTHING, + QED_LL2_ASSERT, +}; + struct qed_ll2_stats { u64 gsi_invalid_hdr; u64 gsi_invalid_pkt_length; @@ -105,6 +122,28 @@ struct qed_ll2_comp_rx_data { } u; }; +struct qed_ll2_acquire_data_inputs { + enum qed_ll2_conn_type conn_type; + u16 mtu; + u16 rx_num_desc; + u16 rx_num_ooo_buffers; + u8 rx_drop_ttl0_flg; + u8 rx_vlan_removal_en; + u16 tx_num_desc; + u8 tx_max_bds_per_packet; + u8 tx_tc; + enum qed_ll2_tx_dest tx_dest; + enum qed_ll2_error_handle ai_err_packet_too_big; + enum qed_ll2_error_handle ai_err_no_buf; + u8 gsi_enable; +}; + +struct qed_ll2_acquire_data { + struct qed_ll2_acquire_data_inputs input; + /* Output container for LL2 connection's handle */ + u8 *p_connection_handle; +}; + struct qed_ll2_tx_pkt_info { void *cookie; dma_addr_t first_frag; -- cgit v1.2.3 From 0518c12f1f79dc2f2020836974c577404e42ae89 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Fri, 9 Jun 2017 17:13:22 +0300 Subject: qed*: LL2 callback operations LL2 today is interrupt driven - when tx/rx completion arrives [or any other indication], qed needs to operate on the connection and pass the information to the protocol-driver [or internal qed consumer]. Since we have several flavors of ll2 employeed by the driver, each handler needs to do an if-else to determine the right functionality to use based on the connection type. In order to make things more scalable [given that we're going to add additional types of ll2 flavors] move the infrastrucutre into using a callback-based approach - the callbacks would be provided as part of the connection's initialization parameters. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 36 +++++++++++++++++++ include/linux/qed/qed_roce_if.h | 80 +++++++++++++++-------------------------- 2 files changed, 64 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index a1f63eca430a..5958b45eb699 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -122,6 +122,40 @@ struct qed_ll2_comp_rx_data { } u; }; +typedef +void (*qed_ll2_complete_rx_packet_cb)(void *cxt, + struct qed_ll2_comp_rx_data *data); + +typedef +void (*qed_ll2_release_rx_packet_cb)(void *cxt, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + bool b_last_packet); + +typedef +void (*qed_ll2_complete_tx_packet_cb)(void *cxt, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet); + +typedef +void (*qed_ll2_release_tx_packet_cb)(void *cxt, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); + +struct qed_ll2_cbs { + qed_ll2_complete_rx_packet_cb rx_comp_cb; + qed_ll2_release_rx_packet_cb rx_release_cb; + qed_ll2_complete_tx_packet_cb tx_comp_cb; + qed_ll2_release_tx_packet_cb tx_release_cb; + void *cookie; +}; + struct qed_ll2_acquire_data_inputs { enum qed_ll2_conn_type conn_type; u16 mtu; @@ -140,6 +174,8 @@ struct qed_ll2_acquire_data_inputs { struct qed_ll2_acquire_data { struct qed_ll2_acquire_data_inputs input; + const struct qed_ll2_cbs *cbs; + /* Output container for LL2 connection's handle */ u8 *p_connection_handle; }; diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index cbb2ff0ce4bc..8e70f5ee05af 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -34,8 +34,6 @@ #include #include #include -#include -#include #include #include #include @@ -491,42 +489,6 @@ struct qed_roce_ll2_packet { enum qed_roce_ll2_tx_dest tx_dest; }; -struct qed_roce_ll2_tx_params { - int reserved; -}; - -struct qed_roce_ll2_rx_params { - u16 vlan_id; - u8 smac[ETH_ALEN]; - int rc; -}; - -struct qed_roce_ll2_cbs { - void (*tx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt); - - void (*rx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt, - struct qed_roce_ll2_rx_params *params); -}; - -struct qed_roce_ll2_params { - u16 max_rx_buffers; - u16 max_tx_buffers; - u16 mtu; - u8 mac_address[ETH_ALEN]; - struct qed_roce_ll2_cbs cbs; - void *cb_cookie; -}; - -struct qed_roce_ll2_info { - u8 handle; - struct qed_roce_ll2_cbs cbs; - u8 mac_address[ETH_ALEN]; - void *cb_cookie; - - /* Lock to protect ll2 */ - struct mutex lock; -}; - enum qed_rdma_type { QED_RDMA_TYPE_ROCE, }; @@ -579,26 +541,40 @@ struct qed_rdma_ops { int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, struct qed_rdma_query_qp_out_params *oparams); int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); + int (*rdma_register_tid)(void *rdma_cxt, struct qed_rdma_register_tid_in_params *iparams); + int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); void (*rdma_free_tid)(void *rdma_cxt, u32 itid); - int (*roce_ll2_start)(struct qed_dev *cdev, - struct qed_roce_ll2_params *params); - int (*roce_ll2_stop)(struct qed_dev *cdev); - int (*roce_ll2_tx)(struct qed_dev *cdev, - struct qed_roce_ll2_packet *packet, - struct qed_roce_ll2_tx_params *params); - int (*roce_ll2_post_rx_buffer)(struct qed_dev *cdev, - struct qed_roce_ll2_buffer *buf, - u64 cookie, u8 notify_fw); - int (*roce_ll2_set_mac_filter)(struct qed_dev *cdev, - u8 *old_mac_address, - u8 *new_mac_address); - int (*roce_ll2_stats)(struct qed_dev *cdev, - struct qed_ll2_stats *stats); + + int (*ll2_acquire_connection)(void *rdma_cxt, + struct qed_ll2_acquire_data *data); + + int (*ll2_establish_connection)(void *rdma_cxt, u8 connection_handle); + int (*ll2_terminate_connection)(void *rdma_cxt, u8 connection_handle); + void (*ll2_release_connection)(void *rdma_cxt, u8 connection_handle); + + int (*ll2_prepare_tx_packet)(void *rdma_cxt, + u8 connection_handle, + struct qed_ll2_tx_pkt_info *pkt, + bool notify_fw); + + int (*ll2_set_fragment_of_tx_packet)(void *rdma_cxt, + u8 connection_handle, + dma_addr_t addr, + u16 nbytes); + int (*ll2_post_rx_buffer)(void *rdma_cxt, u8 connection_handle, + dma_addr_t addr, u16 buf_len, void *cookie, + u8 notify_fw); + int (*ll2_get_stats)(void *rdma_cxt, + u8 connection_handle, + struct qed_ll2_stats *p_stats); + int (*ll2_set_mac_filter)(struct qed_dev *cdev, + u8 *old_mac_address, u8 *new_mac_address); + }; const struct qed_rdma_ops *qed_get_rdma_ops(void); -- cgit v1.2.3 From 0b4d3452b8b4a5309b4445b900e3cec022cca95a Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Mon, 5 Jun 2017 11:45:04 -0400 Subject: security/selinux: allow security_sb_clone_mnt_opts to enable/disable native labeling behavior When an NFSv4 client performs a mount operation, it first mounts the NFSv4 root and then does path walk to the exported path and performs a submount on that, cloning the security mount options from the root's superblock to the submount's superblock in the process. Unless the NFS server has an explicit fsid=0 export with the "security_label" option, the NFSv4 root superblock will not have SBLABEL_MNT set, and neither will the submount superblock after cloning the security mount options. As a result, setxattr's of security labels over NFSv4.2 will fail. In a similar fashion, NFSv4.2 mounts mounted with the context= mount option will not show the correct labels because the nfs_server->caps flags of the cloned superblock will still have NFS_CAP_SECURITY_LABEL set. Allowing the NFSv4 client to enable or disable SECURITY_LSM_NATIVE_LABELS behavior will ensure that the SBLABEL_MNT flag has the correct value when the client traverses from an exported path without the "security_label" option to one with the "security_label" option and vice versa. Similarly, checking to see if SECURITY_LSM_NATIVE_LABELS is set upon return from security_sb_clone_mnt_opts() and clearing NFS_CAP_SECURITY_LABEL if necessary will allow the correct labels to be displayed for NFSv4.2 mounts mounted with the context= mount option. Resolves: https://github.com/SELinuxProject/selinux-kernel/issues/35 Signed-off-by: Scott Mayhew Reviewed-by: Stephen Smalley Tested-by: Stephen Smalley Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 4 +++- include/linux/security.h | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 68d91e423bca..3cc9d77c7527 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1409,7 +1409,9 @@ union security_list_options { unsigned long kern_flags, unsigned long *set_kern_flags); int (*sb_clone_mnt_opts)(const struct super_block *oldsb, - struct super_block *newsb); + struct super_block *newsb, + unsigned long kern_flags, + unsigned long *set_kern_flags); int (*sb_parse_opts_str)(char *options, struct security_mnt_opts *opts); int (*dentry_init_security)(struct dentry *dentry, int mode, const struct qstr *name, void **ctx, diff --git a/include/linux/security.h b/include/linux/security.h index 549cb828a888..b44e954815ce 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -249,7 +249,9 @@ int security_sb_set_mnt_opts(struct super_block *sb, unsigned long kern_flags, unsigned long *set_kern_flags); int security_sb_clone_mnt_opts(const struct super_block *oldsb, - struct super_block *newsb); + struct super_block *newsb, + unsigned long kern_flags, + unsigned long *set_kern_flags); int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts); int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, void **ctx, @@ -605,7 +607,9 @@ static inline int security_sb_set_mnt_opts(struct super_block *sb, } static inline int security_sb_clone_mnt_opts(const struct super_block *oldsb, - struct super_block *newsb) + struct super_block *newsb, + unsigned long kern_flags, + unsigned long *set_kern_flags) { return 0; } -- cgit v1.2.3 From 1e1fc133483ef3b56c20bf3cd9241146c41042f8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 30 May 2017 00:29:38 -0400 Subject: compat_{get,put}_bitmap(): use unsafe_{get,put}_user() unroll the inner loops, while we are at it Signed-off-by: Al Viro --- include/linux/compat.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 1c5f3152cbb5..94ceb0348a25 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -388,8 +388,7 @@ asmlinkage long compat_sys_wait4(compat_pid_t pid, #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) -#define BITS_TO_COMPAT_LONGS(bits) \ - (((bits)+BITS_PER_COMPAT_LONG-1)/BITS_PER_COMPAT_LONG) +#define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG) long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, unsigned long bitmap_size); -- cgit v1.2.3 From ca2406ed58fef3f7c8ef6470cba807bfc3415605 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 31 May 2017 04:22:44 -0400 Subject: times(2): move compat to native Signed-off-by: Al Viro --- include/linux/time.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index c0543f5f25de..f769ea88250d 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -171,9 +171,6 @@ extern int do_getitimer(int which, struct itimerval *value); extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags); -struct tms; -extern void do_sys_times(struct tms *); - /* * Similar to the struct tm in userspace , but it needs to be here so * that the kernel source is self contained. -- cgit v1.2.3 From 1b3c872c8342803d0fcd8042e4e007d173191db6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 31 May 2017 04:46:17 -0400 Subject: rt_sigtimedwait(): move compat to native Signed-off-by: Al Viro --- include/linux/signal.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 1f5a16620693..231603ac20a3 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -246,8 +246,6 @@ extern int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, bool group); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); -extern int do_sigtimedwait(const sigset_t *, siginfo_t *, - const struct timespec *); extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); -- cgit v1.2.3 From 2c8f8afa7f92acb07641bf95b940d384ed1d0294 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 7 Jun 2017 20:52:10 +0900 Subject: mtd: nand: add generic helpers to check, match, maximize ECC settings Driver are responsible for setting up ECC parameters correctly. Those include: - Check if ECC parameters specified (usually by DT) are valid - Meet the chip's ECC requirement - Maximize ECC strength if NAND_ECC_MAXIMIZE flag is set The logic can be generalized by factoring out common code. This commit adds 3 helpers to the NAND framework: nand_check_ecc_caps - Check if preset step_size and strength are valid nand_match_ecc_req - Match the chip's requirement nand_maximize_ecc - Maximize the ECC strength To use the helpers above, a driver needs to provide: - Data array of supported ECC step size and strength - A hook that calculates ECC bytes from the combination of step_size and strength. By using those helpers, code duplication among drivers will be reduced. Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 8b3607bde1b5..568f53e812cd 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -481,6 +481,30 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc) init_waitqueue_head(&nfc->wq); } +/** + * struct nand_ecc_step_info - ECC step information of ECC engine + * @stepsize: data bytes per ECC step + * @strengths: array of supported strengths + * @nstrengths: number of supported strengths + */ +struct nand_ecc_step_info { + int stepsize; + const int *strengths; + int nstrengths; +}; + +/** + * struct nand_ecc_caps - capability of ECC engine + * @stepinfos: array of ECC step information + * @nstepinfos: number of ECC step information + * @calc_ecc_bytes: driver's hook to calculate ECC bytes per step + */ +struct nand_ecc_caps { + const struct nand_ecc_step_info *stepinfos; + int nstepinfos; + int (*calc_ecc_bytes)(int step_size, int strength); +}; + /** * struct nand_ecc_ctrl - Control structure for ECC * @mode: ECC mode @@ -1246,6 +1270,15 @@ int nand_check_erased_ecc_chunk(void *data, int datalen, void *extraoob, int extraooblen, int threshold); +int nand_check_ecc_caps(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + +int nand_match_ecc_req(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + +int nand_maximize_ecc(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + /* Default write_oob implementation */ int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); -- cgit v1.2.3 From a03c60178c181767ecfb26fb311a88742d228118 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 7 Jun 2017 20:52:11 +0900 Subject: mtd: nand: add a shorthand to generate nand_ecc_caps structure struct nand_ecc_caps was designed as flexible as possible to support multiple stepsizes (like sunxi_nand.c). So, we need to write multiple arrays even for the simplest case. I guess many controllers support a single stepsize, so here is a shorthand macro for the case. It allows to describe like ... NAND_ECC_CAPS_SINGLE(denali_pci_ecc_caps, denali_calc_ecc_bytes, 512, 8, 15); ... instead of static const int denali_pci_ecc_strengths[] = {8, 15}; static const struct nand_ecc_step_info denali_pci_ecc_stepinfo = { .stepsize = 512, .strengths = denali_pci_ecc_strengths, .nstrengths = ARRAY_SIZE(denali_pci_ecc_strengths), }; static const struct nand_ecc_caps denali_pci_ecc_caps = { .stepinfos = &denali_pci_ecc_stepinfo, .nstepinfos = 1, .calc_ecc_bytes = denali_calc_ecc_bytes, }; Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 568f53e812cd..dc8fbc033442 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -505,6 +505,20 @@ struct nand_ecc_caps { int (*calc_ecc_bytes)(int step_size, int strength); }; +/* a shorthand to generate struct nand_ecc_caps with only one ECC stepsize */ +#define NAND_ECC_CAPS_SINGLE(__name, __calc, __step, ...) \ +static const int __name##_strengths[] = { __VA_ARGS__ }; \ +static const struct nand_ecc_step_info __name##_stepinfo = { \ + .stepsize = __step, \ + .strengths = __name##_strengths, \ + .nstrengths = ARRAY_SIZE(__name##_strengths), \ +}; \ +static const struct nand_ecc_caps __name = { \ + .stepinfos = &__name##_stepinfo, \ + .nstepinfos = 1, \ + .calc_ecc_bytes = __calc, \ +} + /** * struct nand_ecc_ctrl - Control structure for ECC * @mode: ECC mode -- cgit v1.2.3 From 6576ff740f5c1e6705d33b9d6a922526f20fa0dc Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 27 Apr 2017 17:30:08 +0200 Subject: iio: adc: twl4030: Drop twl4030_get_madc_conversion() Drop legacy twl4030_get_madc_conversion() method. It has been used by drivers to get madc data before it conversion to IIO API. There are no users in the mainline kernel anymore. Signed-off-by: Sebastian Reichel Acked-by: Wolfram Sang Signed-off-by: Jonathan Cameron --- include/linux/i2c/twl4030-madc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h index 1c0134dd3271..0c919ebb31e0 100644 --- a/include/linux/i2c/twl4030-madc.h +++ b/include/linux/i2c/twl4030-madc.h @@ -143,5 +143,4 @@ struct twl4030_madc_user_parms { }; int twl4030_madc_conversion(struct twl4030_madc_request *conv); -int twl4030_get_madc_conversion(int channel_no); #endif -- cgit v1.2.3 From 42ab9278eb3abfe8b500abe6681c8549c408ec8f Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 27 Apr 2017 17:30:09 +0200 Subject: iio: adc: twl4030: Unexport twl4030_madc_conversion() All madc users have been converted to IIO API, so drop the legacy API. The function is still used inside of the driver. Signed-off-by: Sebastian Reichel Acked-by: Wolfram Sang Signed-off-by: Jonathan Cameron --- include/linux/i2c/twl4030-madc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h index 0c919ebb31e0..be9260e261ac 100644 --- a/include/linux/i2c/twl4030-madc.h +++ b/include/linux/i2c/twl4030-madc.h @@ -141,6 +141,4 @@ struct twl4030_madc_user_parms { int status; u16 result; }; - -int twl4030_madc_conversion(struct twl4030_madc_request *conv); #endif -- cgit v1.2.3 From 1adf5a3c0d60ad76a873e90a25a456fd51971774 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 27 Apr 2017 17:30:10 +0200 Subject: iio: adc: twl4030: Drop struct twl4030_madc_user_parms This struct is no longer used by anything in the kernel. Signed-off-by: Sebastian Reichel Acked-by: Wolfram Sang Signed-off-by: Jonathan Cameron --- include/linux/i2c/twl4030-madc.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h index be9260e261ac..f395700fb933 100644 --- a/include/linux/i2c/twl4030-madc.h +++ b/include/linux/i2c/twl4030-madc.h @@ -135,10 +135,4 @@ enum sample_type { #define TWL4030_REG_GPBR1 0x0c #define TWL4030_GPBR1_MADC_HFCLK_EN (1 << 7) -struct twl4030_madc_user_parms { - int channel; - int average; - int status; - u16 result; -}; #endif -- cgit v1.2.3 From 842eb60b044454f186b4e1da87a4333a9827b62f Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 27 Apr 2017 17:30:11 +0200 Subject: iio: adc: twl4030: Remove twl4030_madc_request.func_cb This functionality is not used by the IIO subsystem. Due to removal of legacy API it can also be removed. Signed-off-by: Sebastian Reichel Acked-by: Wolfram Sang Signed-off-by: Jonathan Cameron --- include/linux/i2c/twl4030-madc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h index f395700fb933..34e94747b61e 100644 --- a/include/linux/i2c/twl4030-madc.h +++ b/include/linux/i2c/twl4030-madc.h @@ -51,7 +51,6 @@ struct twl4030_madc_request { bool result_pending; bool raw; int rbuf[TWL4030_MADC_MAX_CHANNELS]; - void (*func_cb)(int len, int channels, int *buf); }; enum conversion_methods { -- cgit v1.2.3 From 7af1a06776da9e943126c2dd740a18710ecd66df Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 27 Apr 2017 17:30:12 +0200 Subject: iio: adc: twl4030: Fold twl4030-madc.h into driver twl4030-madc.h is no longer used by anything outside of the iio driver, so it can be merged into the driver. Signed-off-by: Sebastian Reichel Acked-by: Wolfram Sang Signed-off-by: Jonathan Cameron --- include/linux/i2c/twl4030-madc.h | 137 --------------------------------------- 1 file changed, 137 deletions(-) delete mode 100644 include/linux/i2c/twl4030-madc.h (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h deleted file mode 100644 index 34e94747b61e..000000000000 --- a/include/linux/i2c/twl4030-madc.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * twl4030_madc.h - Header for TWL4030 MADC - * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ - * J Keerthy - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * - */ - -#ifndef _TWL4030_MADC_H -#define _TWL4030_MADC_H - -struct twl4030_madc_conversion_method { - u8 sel; - u8 avg; - u8 rbase; - u8 ctrl; -}; - -#define TWL4030_MADC_MAX_CHANNELS 16 - - -/* - * twl4030_madc_request- madc request packet for channel conversion - * @channels: 16 bit bitmap for individual channels - * @do_avgP: sample the input channel for 4 consecutive cycles - * @method: RT, SW1, SW2 - * @type: Polling or interrupt based method - * @raw: Return raw value, do not convert it - */ - -struct twl4030_madc_request { - unsigned long channels; - bool do_avg; - u16 method; - u16 type; - bool active; - bool result_pending; - bool raw; - int rbuf[TWL4030_MADC_MAX_CHANNELS]; -}; - -enum conversion_methods { - TWL4030_MADC_RT, - TWL4030_MADC_SW1, - TWL4030_MADC_SW2, - TWL4030_MADC_NUM_METHODS -}; - -enum sample_type { - TWL4030_MADC_WAIT, - TWL4030_MADC_IRQ_ONESHOT, - TWL4030_MADC_IRQ_REARM -}; - -#define TWL4030_MADC_CTRL1 0x00 -#define TWL4030_MADC_CTRL2 0x01 - -#define TWL4030_MADC_RTSELECT_LSB 0x02 -#define TWL4030_MADC_SW1SELECT_LSB 0x06 -#define TWL4030_MADC_SW2SELECT_LSB 0x0A - -#define TWL4030_MADC_RTAVERAGE_LSB 0x04 -#define TWL4030_MADC_SW1AVERAGE_LSB 0x08 -#define TWL4030_MADC_SW2AVERAGE_LSB 0x0C - -#define TWL4030_MADC_CTRL_SW1 0x12 -#define TWL4030_MADC_CTRL_SW2 0x13 - -#define TWL4030_MADC_RTCH0_LSB 0x17 -#define TWL4030_MADC_GPCH0_LSB 0x37 - -#define TWL4030_MADC_MADCON (1 << 0) /* MADC power on */ -#define TWL4030_MADC_BUSY (1 << 0) /* MADC busy */ -/* MADC conversion completion */ -#define TWL4030_MADC_EOC_SW (1 << 1) -/* MADC SWx start conversion */ -#define TWL4030_MADC_SW_START (1 << 5) -#define TWL4030_MADC_ADCIN0 (1 << 0) -#define TWL4030_MADC_ADCIN1 (1 << 1) -#define TWL4030_MADC_ADCIN2 (1 << 2) -#define TWL4030_MADC_ADCIN3 (1 << 3) -#define TWL4030_MADC_ADCIN4 (1 << 4) -#define TWL4030_MADC_ADCIN5 (1 << 5) -#define TWL4030_MADC_ADCIN6 (1 << 6) -#define TWL4030_MADC_ADCIN7 (1 << 7) -#define TWL4030_MADC_ADCIN8 (1 << 8) -#define TWL4030_MADC_ADCIN9 (1 << 9) -#define TWL4030_MADC_ADCIN10 (1 << 10) -#define TWL4030_MADC_ADCIN11 (1 << 11) -#define TWL4030_MADC_ADCIN12 (1 << 12) -#define TWL4030_MADC_ADCIN13 (1 << 13) -#define TWL4030_MADC_ADCIN14 (1 << 14) -#define TWL4030_MADC_ADCIN15 (1 << 15) - -/* Fixed channels */ -#define TWL4030_MADC_BTEMP TWL4030_MADC_ADCIN1 -#define TWL4030_MADC_VBUS TWL4030_MADC_ADCIN8 -#define TWL4030_MADC_VBKB TWL4030_MADC_ADCIN9 -#define TWL4030_MADC_ICHG TWL4030_MADC_ADCIN10 -#define TWL4030_MADC_VCHG TWL4030_MADC_ADCIN11 -#define TWL4030_MADC_VBAT TWL4030_MADC_ADCIN12 - -/* Step size and prescaler ratio */ -#define TEMP_STEP_SIZE 147 -#define TEMP_PSR_R 100 -#define CURR_STEP_SIZE 147 -#define CURR_PSR_R1 44 -#define CURR_PSR_R2 88 - -#define TWL4030_BCI_BCICTL1 0x23 -#define TWL4030_BCI_CGAIN 0x020 -#define TWL4030_BCI_MESBAT (1 << 1) -#define TWL4030_BCI_TYPEN (1 << 4) -#define TWL4030_BCI_ITHEN (1 << 3) - -#define REG_BCICTL2 0x024 -#define TWL4030_BCI_ITHSENS 0x007 - -/* Register and bits for GPBR1 register */ -#define TWL4030_REG_GPBR1 0x0c -#define TWL4030_GPBR1_MADC_HFCLK_EN (1 << 7) - -#endif -- cgit v1.2.3 From d89e119a088ec83881eda5645307ae252ecea33a Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 27 Apr 2017 15:29:15 +0200 Subject: iio: add hardware triggered operating mode Devices, like stm32 timer, could be triggered by hardware events which are not buffer or software events. However it could be necessary to validate the triggers like it is done for buffer or event triggered modes. This patch add a new INDIO_HARDWARE_TRIGGERED operating mode for this kind of devices and allow this mode to register trigger consumer. Signed-off-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 3f5ea2e9a39e..d68bec297a45 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -352,10 +352,16 @@ unsigned int iio_get_time_res(const struct iio_dev *indio_dev); #define INDIO_BUFFER_SOFTWARE 0x04 #define INDIO_BUFFER_HARDWARE 0x08 #define INDIO_EVENT_TRIGGERED 0x10 +#define INDIO_HARDWARE_TRIGGERED 0x20 #define INDIO_ALL_BUFFER_MODES \ (INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE | INDIO_BUFFER_SOFTWARE) +#define INDIO_ALL_TRIGGERED_MODES \ + (INDIO_BUFFER_TRIGGERED \ + | INDIO_EVENT_TRIGGERED \ + | INDIO_HARDWARE_TRIGGERED) + #define INDIO_MAX_RAW_ELEMENTS 4 struct iio_trigger; /* forward declaration */ -- cgit v1.2.3 From 9010624cc5b446d3377dde4753eecbaf945f3a41 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 22:34:39 +0200 Subject: hwmon: (ads1015) move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Guenter Roeck --- include/linux/i2c/ads1015.h | 36 ----------------------------------- include/linux/platform_data/ads1015.h | 36 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 36 deletions(-) delete mode 100644 include/linux/i2c/ads1015.h create mode 100644 include/linux/platform_data/ads1015.h (limited to 'include/linux') diff --git a/include/linux/i2c/ads1015.h b/include/linux/i2c/ads1015.h deleted file mode 100644 index d5aa2a045669..000000000000 --- a/include/linux/i2c/ads1015.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Platform Data for ADS1015 12-bit 4-input ADC - * (C) Copyright 2010 - * Dirk Eibach, Guntermann & Drunck GmbH - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef LINUX_ADS1015_H -#define LINUX_ADS1015_H - -#define ADS1015_CHANNELS 8 - -struct ads1015_channel_data { - bool enabled; - unsigned int pga; - unsigned int data_rate; -}; - -struct ads1015_platform_data { - struct ads1015_channel_data channel_data[ADS1015_CHANNELS]; -}; - -#endif /* LINUX_ADS1015_H */ diff --git a/include/linux/platform_data/ads1015.h b/include/linux/platform_data/ads1015.h new file mode 100644 index 000000000000..d5aa2a045669 --- /dev/null +++ b/include/linux/platform_data/ads1015.h @@ -0,0 +1,36 @@ +/* + * Platform Data for ADS1015 12-bit 4-input ADC + * (C) Copyright 2010 + * Dirk Eibach, Guntermann & Drunck GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef LINUX_ADS1015_H +#define LINUX_ADS1015_H + +#define ADS1015_CHANNELS 8 + +struct ads1015_channel_data { + bool enabled; + unsigned int pga; + unsigned int data_rate; +}; + +struct ads1015_platform_data { + struct ads1015_channel_data channel_data[ADS1015_CHANNELS]; +}; + +#endif /* LINUX_ADS1015_H */ -- cgit v1.2.3 From 570999f306fc0375a533a1906ff35fffe289e36b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 22:34:40 +0200 Subject: hwmon: (ds620) move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Guenter Roeck --- include/linux/i2c/ds620.h | 21 --------------------- include/linux/platform_data/ds620.h | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 21 deletions(-) delete mode 100644 include/linux/i2c/ds620.h create mode 100644 include/linux/platform_data/ds620.h (limited to 'include/linux') diff --git a/include/linux/i2c/ds620.h b/include/linux/i2c/ds620.h deleted file mode 100644 index 736bb87ac0fc..000000000000 --- a/include/linux/i2c/ds620.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _LINUX_DS620_H -#define _LINUX_DS620_H - -#include -#include - -/* platform data for the DS620 temperature sensor and thermostat */ - -struct ds620_platform_data { - /* - * Thermostat output pin PO mode: - * 0 = always low (default) - * 1 = PO_LOW - * 2 = PO_HIGH - * - * (see Documentation/hwmon/ds620) - */ - int pomode; -}; - -#endif /* _LINUX_DS620_H */ diff --git a/include/linux/platform_data/ds620.h b/include/linux/platform_data/ds620.h new file mode 100644 index 000000000000..736bb87ac0fc --- /dev/null +++ b/include/linux/platform_data/ds620.h @@ -0,0 +1,21 @@ +#ifndef _LINUX_DS620_H +#define _LINUX_DS620_H + +#include +#include + +/* platform data for the DS620 temperature sensor and thermostat */ + +struct ds620_platform_data { + /* + * Thermostat output pin PO mode: + * 0 = always low (default) + * 1 = PO_LOW + * 2 = PO_HIGH + * + * (see Documentation/hwmon/ds620) + */ + int pomode; +}; + +#endif /* _LINUX_DS620_H */ -- cgit v1.2.3 From 8116e8dd564fcbadf9f3697dcd0c3d37c049fb45 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 22:34:41 +0200 Subject: hwmon: (ltc4245) move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Guenter Roeck --- include/linux/i2c/ltc4245.h | 21 --------------------- include/linux/platform_data/ltc4245.h | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 21 deletions(-) delete mode 100644 include/linux/i2c/ltc4245.h create mode 100644 include/linux/platform_data/ltc4245.h (limited to 'include/linux') diff --git a/include/linux/i2c/ltc4245.h b/include/linux/i2c/ltc4245.h deleted file mode 100644 index 56bda4be0016..000000000000 --- a/include/linux/i2c/ltc4245.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Platform Data for LTC4245 hardware monitor chip - * - * Copyright (c) 2010 Ira W. Snyder - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef LINUX_LTC4245_H -#define LINUX_LTC4245_H - -#include - -struct ltc4245_platform_data { - bool use_extra_gpios; -}; - -#endif /* LINUX_LTC4245_H */ diff --git a/include/linux/platform_data/ltc4245.h b/include/linux/platform_data/ltc4245.h new file mode 100644 index 000000000000..56bda4be0016 --- /dev/null +++ b/include/linux/platform_data/ltc4245.h @@ -0,0 +1,21 @@ +/* + * Platform Data for LTC4245 hardware monitor chip + * + * Copyright (c) 2010 Ira W. Snyder + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef LINUX_LTC4245_H +#define LINUX_LTC4245_H + +#include + +struct ltc4245_platform_data { + bool use_extra_gpios; +}; + +#endif /* LINUX_LTC4245_H */ -- cgit v1.2.3 From 0c9fe1614126171c70e0dc0b0635d45a327ac82a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 22:34:42 +0200 Subject: hwmon: (max6639) move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Guenter Roeck --- include/linux/i2c/max6639.h | 14 -------------- include/linux/platform_data/max6639.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) delete mode 100644 include/linux/i2c/max6639.h create mode 100644 include/linux/platform_data/max6639.h (limited to 'include/linux') diff --git a/include/linux/i2c/max6639.h b/include/linux/i2c/max6639.h deleted file mode 100644 index 6011c42034da..000000000000 --- a/include/linux/i2c/max6639.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _LINUX_MAX6639_H -#define _LINUX_MAX6639_H - -#include - -/* platform data for the MAX6639 temperature sensor and fan control */ - -struct max6639_platform_data { - bool pwm_polarity; /* Polarity low (0) or high (1, default) */ - int ppr; /* Pulses per rotation 1..4 (default == 2) */ - int rpm_range; /* 2000, 4000 (default), 8000 or 16000 */ -}; - -#endif /* _LINUX_MAX6639_H */ diff --git a/include/linux/platform_data/max6639.h b/include/linux/platform_data/max6639.h new file mode 100644 index 000000000000..6011c42034da --- /dev/null +++ b/include/linux/platform_data/max6639.h @@ -0,0 +1,14 @@ +#ifndef _LINUX_MAX6639_H +#define _LINUX_MAX6639_H + +#include + +/* platform data for the MAX6639 temperature sensor and fan control */ + +struct max6639_platform_data { + bool pwm_polarity; /* Polarity low (0) or high (1, default) */ + int ppr; /* Pulses per rotation 1..4 (default == 2) */ + int rpm_range; /* 2000, 4000 (default), 8000 or 16000 */ +}; + +#endif /* _LINUX_MAX6639_H */ -- cgit v1.2.3 From 4ba1bb12cf21f4ee4681aee939c4d9d82d6f49f2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 22:34:43 +0200 Subject: hwmon: (pmbus) move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Guenter Roeck --- include/linux/i2c/pmbus.h | 49 ----------------------------------------------- include/linux/pmbus.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 49 deletions(-) delete mode 100644 include/linux/i2c/pmbus.h create mode 100644 include/linux/pmbus.h (limited to 'include/linux') diff --git a/include/linux/i2c/pmbus.h b/include/linux/i2c/pmbus.h deleted file mode 100644 index ee3c2aba2a8e..000000000000 --- a/include/linux/i2c/pmbus.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Hardware monitoring driver for PMBus devices - * - * Copyright (c) 2010, 2011 Ericsson AB. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef _PMBUS_H_ -#define _PMBUS_H_ - -/* flags */ - -/* - * PMBUS_SKIP_STATUS_CHECK - * - * During register detection, skip checking the status register for - * communication or command errors. - * - * Some PMBus chips respond with valid data when trying to read an unsupported - * register. For such chips, checking the status register is mandatory when - * trying to determine if a chip register exists or not. - * Other PMBus chips don't support the STATUS_CML register, or report - * communication errors for no explicable reason. For such chips, checking - * the status register must be disabled. - */ -#define PMBUS_SKIP_STATUS_CHECK (1 << 0) - -struct pmbus_platform_data { - u32 flags; /* Device specific flags */ - - /* regulator support */ - int num_regulators; - struct regulator_init_data *reg_init_data; -}; - -#endif /* _PMBUS_H_ */ diff --git a/include/linux/pmbus.h b/include/linux/pmbus.h new file mode 100644 index 000000000000..ee3c2aba2a8e --- /dev/null +++ b/include/linux/pmbus.h @@ -0,0 +1,49 @@ +/* + * Hardware monitoring driver for PMBus devices + * + * Copyright (c) 2010, 2011 Ericsson AB. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _PMBUS_H_ +#define _PMBUS_H_ + +/* flags */ + +/* + * PMBUS_SKIP_STATUS_CHECK + * + * During register detection, skip checking the status register for + * communication or command errors. + * + * Some PMBus chips respond with valid data when trying to read an unsupported + * register. For such chips, checking the status register is mandatory when + * trying to determine if a chip register exists or not. + * Other PMBus chips don't support the STATUS_CML register, or report + * communication errors for no explicable reason. For such chips, checking + * the status register must be disabled. + */ +#define PMBUS_SKIP_STATUS_CHECK (1 << 0) + +struct pmbus_platform_data { + u32 flags; /* Device specific flags */ + + /* regulator support */ + int num_regulators; + struct regulator_init_data *reg_init_data; +}; + +#endif /* _PMBUS_H_ */ -- cgit v1.2.3 From 8e0931022e12e45bab9afe01e830d697d9c8e73d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 26 May 2017 15:30:34 +0200 Subject: Revert "clockevents: Add a clkevt-of mechanism like clksrc-of" After discussing it, this feature is dropped as it is not considered adequate: https://patchwork.kernel.org/patch/9639317/ There is no user of this macro yet, so there is no impact on the drivers. This reverts commit 376bc27150f180d9f5eddec6a14117780177589d. Cc: Mark Rutland Signed-off-by: Daniel Lezcano --- include/linux/clockchips.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index acc9ce05e5f0..a116926598fd 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -223,13 +223,4 @@ static inline void tick_setup_hrtimer_broadcast(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -#define CLOCKEVENT_OF_DECLARE(name, compat, fn) \ - OF_DECLARE_1_RET(clkevt, name, compat, fn) - -#ifdef CONFIG_CLKEVT_PROBE -extern int clockevent_probe(void); -#else -static inline int clockevent_probe(void) { return 0; } -#endif - #endif /* _LINUX_CLOCKCHIPS_H */ -- cgit v1.2.3 From 8be381a131c29c4737aed44e7e5f90cb77bb4a7e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 19 May 2017 10:35:10 +0200 Subject: soc: renesas: Rework Kconfig and Makefile logic The goals are to: - Allow precise control over and automatic selection of which (sub)drivers are used for which SoC, - Allow adding support for new SoCs easily, - Allow compile-testing of all (sub)drivers, - Keep driver selection logic in the subsystem-specific Kconfig, independent from the architecture-specific Kconfig (i.e. no "select" from arch/arm64/Kconfig.platforms), to avoid dependencies. This is implemented by: - Introducing Kconfig symbols for all drivers and sub-drivers, - Introducing the Kconfig symbol SOC_RENESAS, which is enabled automatically when building for a Renesas ARM platform, and which enables all required drivers without interaction of the user, based on SoC-specific ARCH_* symbols, - Allowing the user to enable any Kconfig symbol manually if COMPILE_TEST is enabled, - Using the new Kconfig symbols instead of the ARCH_* symbols to control compilation in the Makefile, - Always entering drivers/soc/renesas/ during the build. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- include/linux/soc/renesas/rcar-rst.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h index 787e7ad53d45..2c231f2280a6 100644 --- a/include/linux/soc/renesas/rcar-rst.h +++ b/include/linux/soc/renesas/rcar-rst.h @@ -1,8 +1,7 @@ #ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__ #define __LINUX_SOC_RENESAS_RCAR_RST_H__ -#if defined(CONFIG_ARCH_RCAR_GEN1) || defined(CONFIG_ARCH_RCAR_GEN2) || \ - defined(CONFIG_ARCH_R8A7795) || defined(CONFIG_ARCH_R8A7796) +#ifdef CONFIG_RST_RCAR int rcar_rst_read_mode_pins(u32 *mode); #else static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; } -- cgit v1.2.3 From 3889a803e1da9bd7cd10d6504bf281ee7e55dfd6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Jun 2017 11:23:41 +0200 Subject: net: factor out a helper to decrement the skb refcount The same code is replicated in 3 different places; move it to a common helper. Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d460a4cbda1c..decce3655a48 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -867,6 +867,19 @@ static inline unsigned int skb_napi_id(const struct sk_buff *skb) #endif } +/* decrement the reference count and return true if we can free the skb */ +static inline bool skb_unref(struct sk_buff *skb) +{ + if (unlikely(!skb)) + return false; + if (likely(atomic_read(&skb->users) == 1)) + smp_rmb(); + else if (likely(!atomic_dec_and_test(&skb->users))) + return false; + + return true; +} + void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_tx_error(struct sk_buff *skb); -- cgit v1.2.3 From 0a463c78d25b9464b77311d9dda297550a2d6aa5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Jun 2017 11:23:42 +0200 Subject: udp: avoid a cache miss on dequeue Since UDP no more uses sk->destructor, we can clear completely the skb head state before enqueuing. Amend and use skb_release_head_state() for that. All head states share a single cacheline, which is not normally used/accesses on dequeue. We can avoid entirely accessing such cacheline implementing and using in the UDP code a specialized skb free helper which ignores the skb head state. This saves a cacheline miss at skb deallocation time. v1 -> v2: replaced secpath_reset() with skb_release_head_state() Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index decce3655a48..d66d4feaac86 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -880,10 +880,12 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +void skb_release_head_state(struct sk_buff *skb); void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_tx_error(struct sk_buff *skb); void consume_skb(struct sk_buff *skb); +void consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); extern struct kmem_cache *skbuff_head_cache; -- cgit v1.2.3 From b46c73378c8436c3cd3fa19cead57a645adb0ed0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 23 Aug 2013 14:12:09 -0700 Subject: driver-core: remove struct bus_type.dev_attrs Now that all in-kernel users of bus_type.dev_attrs have been converted to use dev_groups instead, the dev_attrs field, and logic surrounding it, can be removed. Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 5b725b943cf2..b63cbf2528ef 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -66,7 +66,6 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * @name: The name of the bus. * @dev_name: Used for subsystems to enumerate devices like ("foo%u", dev->id). * @dev_root: Default device to use as the parent. - * @dev_attrs: Default attributes of the devices on the bus. * @bus_groups: Default attributes of the bus. * @dev_groups: Default attributes of the devices on the bus. * @drv_groups: Default attributes of the device drivers on the bus. @@ -112,7 +111,6 @@ struct bus_type { const char *name; const char *dev_name; struct device *dev_root; - struct device_attribute *dev_attrs; /* use dev_groups instead */ const struct attribute_group **bus_groups; const struct attribute_group **dev_groups; const struct attribute_group **drv_groups; -- cgit v1.2.3 From 2371cd90abe3fa1b88e15111abf2cc0a26db6e52 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 May 2017 09:18:12 -0700 Subject: scsi: storvsc: remove unnecessary channel inbound lock In storvsc driver, inbound messages do not go through inbound lock. The only effect of this lock was is to provide a barrier for connect and remove logic. Signed-off-by: Stephen Hemminger Signed-off-by: Martin K. Petersen --- include/linux/hyperv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index e09fc8290c2f..b7d7bbec74e0 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -744,7 +744,6 @@ struct vmbus_channel { u32 ringbuffer_pagecount; struct hv_ring_buffer_info outbound; /* send to parent */ struct hv_ring_buffer_info inbound; /* receive from parent */ - spinlock_t inbound_lock; struct vmbus_close_msg close_msg; -- cgit v1.2.3 From 91b5ab628929d97357108594610e7c07be93e2fd Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Fri, 9 Jun 2017 13:08:42 +0100 Subject: cfg80211: support 4-way handshake offloading for WPA/WPA2-PSK Let drivers advertise support for station-mode 4-way handshake offloading with a new NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag. Extend use of NL80211_ATTR_PMK attribute indicating it might be passed as part of NL80211_CMD_CONNECT command, and contain the PSK (which is the PMK, hence the name.) The driver/device is assumed to handle the 4-way handshake by itself in this case (including key derivations, etc.), instead of relying on the supplicant. This patch is somewhat based on this one (by Vladimir Kondratiev): https://patchwork.kernel.org/patch/1309561/. Signed-off-by: Vladimir Kondratiev Signed-off-by: Eliad Peller Signed-off-by: Luca Coelho [arend.vanspriel@broadcom.com rebase dealing with existing ATTR_PMK] Signed-off-by: Arend van Spriel [reword NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK docs to indicate that this offload might be required] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 69033353d0d1..e97ca3a9a67b 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2401,6 +2401,7 @@ enum ieee80211_sa_query_action { #define WLAN_MAX_KEY_LEN 32 #define WLAN_PMKID_LEN 16 +#define WLAN_PMK_LEN 32 #define WLAN_OUI_WFA 0x506f9a #define WLAN_OUI_TYPE_WFA_P2P 9 -- cgit v1.2.3 From 3a00df5707b6af715e78c26569800e0c2eb615fe Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 9 Jun 2017 13:08:43 +0100 Subject: cfg80211: support 4-way handshake offloading for 802.1X Add API for setting the PMK to the driver. For FT support, allow setting also the PMK-R0 Name. This can be used by drivers that support 4-Way handshake offload while IEEE802.1X authentication is managed by upper layers. Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg [arend.vanspriel@broadcom.com: add WANT_1X_4WAY_HS attribute] Signed-off-by: Arend van Spriel [reword NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X docs a bit to say that the device may require it] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e97ca3a9a67b..34e1bcd2d7ff 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2400,8 +2400,11 @@ enum ieee80211_sa_query_action { #define WLAN_MAX_KEY_LEN 32 +#define WLAN_PMK_NAME_LEN 16 #define WLAN_PMKID_LEN 16 +#define WLAN_PMK_LEN_EAP_LEAP 16 #define WLAN_PMK_LEN 32 +#define WLAN_PMK_LEN_SUITE_B_192 48 #define WLAN_OUI_WFA 0x506f9a #define WLAN_OUI_TYPE_WFA_P2P 9 -- cgit v1.2.3 From 7cf916bd639bd26db7214f2205bccdb4b9306256 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 13 Jun 2017 16:01:13 +1000 Subject: usb: Fix typo in the definition of Endpoint[out]Request The current definition is wrong. This breaks my upcoming Aspeed virtual hub driver. Signed-off-by: Benjamin Herrenschmidt Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 50398b69ca44..a1f03ebfde47 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -565,9 +565,9 @@ extern void usb_ep0_reinit(struct usb_device *); ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8) #define EndpointRequest \ - ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8) + ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8) #define EndpointOutRequest \ - ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8) + ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8) /* class requests from the USB 2.0 hub spec, table 11-15 */ #define HUB_CLASS_REQ(dir, type, request) ((((dir) | (type)) << 8) | (request)) -- cgit v1.2.3 From 4e75e1d7dac9d7c95c57eceb451d01f2afcc8626 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 6 Jun 2017 17:59:00 +0200 Subject: driver core: add helper to reuse a device-tree node Add a helper function to be used when reusing the device-tree node of another device. It is fairly common for drivers to reuse the device-tree node of a parent (or other ancestor) device when creating class or bus devices (e.g. gpio chips, i2c adapters, iio chips, spi masters, serdev, phys, usb root hubs). But reusing a device-tree node may cause problems if the new device is later probed as for example driver core would currently attempt to reinitialise an already active associated pinmux configuration. Other potential issues include the platform-bus code unconditionally dropping the device-tree node reference in its device destructor, reinitialisation of other bus-managed resources such as clocks, and the recently added DMA-setup in driver core. Note that for most examples above this is currently not an issue as the devices are never probed, but this is a problem for the USB bus which has recently gained device-tree support. This was discovered and worked-around in a rather ad-hoc fashion by commit dc5878abf49c ("usb: core: move root hub's device node assignment after it is added to bus") by not setting the of_node pointer until after the root-hub device has been registered. Instead we can allow devices to reuse a device-tree node by setting a flag in their struct device that can be used by core, bus and driver code to avoid resources from being over-allocated. Note that the helper also grabs an extra reference to the device node, which specifically balances the unconditional put in the platform-device destructor. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 9ef518af5515..60ab00b13095 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -879,6 +879,8 @@ struct dev_links_info { * * @offline_disabled: If set, the device is permanently online. * @offline: Set after successful invocation of bus type's .offline(). + * @of_node_reused: Set if the device-tree node is shared with an ancestor + * device. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -966,6 +968,7 @@ struct device { bool offline_disabled:1; bool offline:1; + bool of_node_reused:1; }; static inline struct device *kobj_to_dev(struct kobject *kobj) @@ -1144,6 +1147,7 @@ extern int device_offline(struct device *dev); extern int device_online(struct device *dev); extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); extern void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode); +void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); static inline int dev_num_vf(struct device *dev) { -- cgit v1.2.3 From 39673e1995381b09a63cc7e9d0aea7cf871cb359 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 15:36:28 +0100 Subject: nvme.h: add struct nvme_host_mem_buf_desc and HMB flags Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- include/linux/nvme.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index e400a69fa1d3..180a2fdbcaef 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -587,6 +587,11 @@ struct nvme_feat_auto_pst { __le64 entries[32]; }; +enum { + NVME_HOST_MEM_ENABLE = (1 << 0), + NVME_HOST_MEM_RETURN = (1 << 1), +}; + /* Admin commands */ enum nvme_admin_opcode { @@ -671,6 +676,12 @@ struct nvme_features { __u32 rsvd12[4]; }; +struct nvme_host_mem_buf_desc { + __le64 addr; + __le32 size; + __u32 rsvd; +}; + struct nvme_create_cq { __u8 opcode; __u8 flags; -- cgit v1.2.3 From b85cf7348ab50e2042b732e19031b1d22eedc741 Mon Sep 17 00:00:00 2001 From: Arnav Dawn Date: Fri, 12 May 2017 17:12:03 +0200 Subject: nvme.h: add dword 12 - 15 fields to struct nvme_features Signed-off-by: Arnav Dawn [hch: split from a larger patch, new changelog] Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- include/linux/nvme.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 180a2fdbcaef..51ca4771be2c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -673,7 +673,10 @@ struct nvme_features { union nvme_data_ptr dptr; __le32 fid; __le32 dword11; - __u32 rsvd12[4]; + __le32 dword12; + __le32 dword13; + __le32 dword14; + __le32 dword15; }; struct nvme_host_mem_buf_desc { -- cgit v1.2.3 From 7b08579f5c2a5c0e94b4a9f298339c2fdf0fcc89 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 6 Jun 2017 12:54:41 +0200 Subject: tty: drop unused alt_speed from tty_struct Drop the now unused alt_speed field from struct tty_struct. Setting an alt_speed using the ASYNC_SPD flags has been deprecated since v2.1.69, and has been broken for all tty drivers but serial-core since v3.10 and commit 6865ff222cca ("TTY: do not warn about setting speed via SPD_*") without anyone noticing. Note that serial-core still supports changing speed using TIOCSSERIAL and SPD flags (including "alt-speeds"), but also warns about it being deprecated since pre-git. Signed-off-by: Johan Hovold Reviewed-by: Andy Shevchenko Reviewed-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index eccb4ec30a8a..585cf2b5ca94 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -316,7 +316,6 @@ struct tty_struct { struct tty_struct *link; struct fasync_struct *fasync; - int alt_speed; /* For magic substitution of 38400 bps */ wait_queue_head_t write_wait; wait_queue_head_t read_wait; struct work_struct hangup_work; -- cgit v1.2.3 From be36e000bc2d28512721c6e09c3df920b1bfad5e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Jun 2017 12:18:02 +0200 Subject: ARM: at91: fix at91_suspend_entering_slow_clock link error When CONFIG_ARCH_AT91 is enabled, but none of the specific SoC support is in use, some at91 specific drivers fail to link: drivers/tty/serial/atmel_serial.o: In function `atmel_serial_suspend': atmel_serial.c:(.text.atmel_serial_suspend+0x1e): undefined reference to `at91_suspend_entering_slow_clock' drivers/usb/host/ohci-at91.o: In function `ohci_hcd_at91_drv_suspend': ohci-at91.c:(.text.ohci_hcd_at91_drv_suspend+0x12): undefined reference to `at91_suspend_entering_slow_clock' drivers/usb/gadget/udc/at91_udc.o: In function `at91udc_suspend': at91_udc.c:(.text.at91udc_suspend+0x26): undefined reference to `at91_suspend_entering_slow_clock' This changes the at91_suspend_entering_slow_clock hack once more, adding an alternative inline implementation that is used exactly in those cases that don't provide the normal implementation. Fixes: c1892c2379d2 ("ARM: at91: handle CONFIG_PM for armv7m configurations") Signed-off-by: Arnd Bergmann Signed-off-by: Alexandre Belloni --- include/linux/platform_data/atmel.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 3c8825b67298..7b6dce7d6d33 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -52,6 +52,13 @@ struct atmel_uart_data { }; /* FIXME: this needs a better location, but gets stuff building again */ +#ifdef CONFIG_ATMEL_PM extern int at91_suspend_entering_slow_clock(void); +#else +static inline int at91_suspend_entering_slow_clock(void) +{ + return 0; +} +#endif #endif /* __ATMEL_H__ */ -- cgit v1.2.3 From 67fdfda4a99edea939a63bad1797d69dd8de00d6 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 6 Jun 2017 16:03:19 +0300 Subject: usb: gadget: core: introduce ->udc_set_speed() method Sometimes, the gadget driver we want to run has max_speed lower than what the UDC supports. In such situations, UDC might want to make sure we don't try to connect on speeds not supported by the gadget driver (e.g. super-speed capable dwc3 with high-speed capable g_midi) because that will just fail. In order to make sure this situation never happens, we introduce a new optional ->udc_set_speed() method which can be implemented by interested UDC drivers. Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 3ee5f2a7c0b4..1a4a4bacfae6 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -304,6 +304,7 @@ struct usb_gadget_ops { int (*udc_start)(struct usb_gadget *, struct usb_gadget_driver *); int (*udc_stop)(struct usb_gadget *); + void (*udc_set_speed)(struct usb_gadget *, enum usb_device_speed); struct usb_ep *(*match_ep)(struct usb_gadget *, struct usb_endpoint_descriptor *, struct usb_ss_ep_comp_descriptor *); -- cgit v1.2.3 From 408455245a48a1ecabd90133bae0baec3ec4cfb8 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 29 Mar 2017 18:34:50 +0200 Subject: PM / Domains: Allow overriding the ->xlate() callback Allow generic power domain providers to override the ->xlate() callback in case the default genpd_xlate_onecell() translation callback is not good enough. One potential use-case for this is to allow generic power domains to be specified by an ID rather than an index. Signed-off-by: Thierry Reding Acked-by: Ulf Hansson Signed-off-by: Thierry Reding --- include/linux/pm_domain.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b7803a251044..41004d97cefa 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -206,9 +206,13 @@ static inline void pm_genpd_syscore_poweron(struct device *dev) {} /* OF PM domain providers */ struct of_device_id; +typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args, + void *data); + struct genpd_onecell_data { struct generic_pm_domain **domains; unsigned int num_domains; + genpd_xlate_t xlate; }; #ifdef CONFIG_PM_GENERIC_DOMAINS_OF -- cgit v1.2.3 From 8a3d809373c6790b3958f74fa5640aedd4e804dd Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 13 Jun 2017 14:51:07 +0200 Subject: ARM: at91: remove atmel_nand_data Since AVR32 is gone and the driver rework, struct atmel_nand_data is not used anywhere. Acked-by: Boris Brezillon Signed-off-by: Alexandre Belloni --- include/linux/platform_data/atmel.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 7b6dce7d6d33..70c5c766628e 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -7,8 +7,6 @@ #ifndef __ATMEL_H__ #define __ATMEL_H__ -#include -#include #include /* Compact Flash */ @@ -23,25 +21,6 @@ struct at91_cf_data { #define AT91_IDE_SWAP_A0_A2 0x02 }; - /* NAND / SmartMedia */ -struct atmel_nand_data { - int enable_pin; /* chip enable */ - int det_pin; /* card detect */ - int rdy_pin; /* ready/busy */ - u8 rdy_pin_active_low; /* rdy_pin value is inverted */ - u8 ale; /* address line number connected to ALE */ - u8 cle; /* address line number connected to CLE */ - u8 bus_width_16; /* buswidth is 16 bit */ - u8 ecc_mode; /* ecc mode */ - u8 on_flash_bbt; /* bbt on flash */ - struct mtd_partition *parts; - unsigned int num_parts; - bool has_dma; /* support dma transfer */ - - /* default is false, only for at32ap7000 chip is true */ - bool need_reset_workaround; -}; - /* Serial */ struct atmel_uart_data { int num; /* port num */ -- cgit v1.2.3 From d396e84c56047b303cac378dde4b2e5cc430b336 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Mon, 12 Jun 2017 23:55:38 +0300 Subject: mdio_bus: handle only single PHY reset GPIO Commit 4c5e7a2c0501 ("dt-bindings: mdio: Clarify binding document") declared that a MDIO reset GPIO property should have only a single GPIO reference/specifier, however the supporting code was left intact, still burdening the kernel with now apparently useless loops -- get rid of them. Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- include/linux/phy.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 414242200a90..51bea6593409 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -226,10 +226,8 @@ struct mii_bus { /* GPIO reset pulse width in microseconds */ int reset_delay_us; - /* Number of reset GPIOs */ - int num_reset_gpios; - /* Array of RESET GPIO descriptors */ - struct gpio_desc **reset_gpiod; + /* RESET GPIO descriptor pointer */ + struct gpio_desc *reset_gpiod; }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) -- cgit v1.2.3 From 5514174fe9c61c83bd8781c1e048ea6b4bf16a14 Mon Sep 17 00:00:00 2001 From: "yuval.shaia@oracle.com" Date: Tue, 13 Jun 2017 10:09:46 +0300 Subject: net: phy: Make phy_ethtool_ksettings_get return void Make return value void since function never return meaningfull value Signed-off-by: Yuval Shaia Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 51bea6593409..23d2e46dd322 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -872,8 +872,8 @@ void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); void phy_trigger_machine(struct phy_device *phydev, bool sync); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); -int phy_ethtool_ksettings_get(struct phy_device *phydev, - struct ethtool_link_ksettings *cmd); +void phy_ethtool_ksettings_get(struct phy_device *phydev, + struct ethtool_link_ksettings *cmd); int phy_ethtool_ksettings_set(struct phy_device *phydev, const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); -- cgit v1.2.3 From 8caab75fd2c2a92667cbb1cd315720bede3feaa9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 Jun 2017 13:23:52 +0200 Subject: spi: Generalize SPI "master" to "controller" Now struct spi_master is used for both SPI master and slave controllers, it makes sense to rename it to struct spi_controller, and replace "master" by "controller" where appropriate. For now this conversion is done for SPI core infrastructure only. Wrappers are provided for backwards compatibility, until all SPI drivers have been converted. Noteworthy details: - SPI_MASTER_GPIO_SS is retained, as it only makes sense for SPI master controllers, - spi_busnum_to_master() is retained, as it looks up masters only, - A new field spi_device.controller is added, but spi_device.master is retained for compatibility (both are always initialized by spi_alloc_device()), - spi_flash_read() is used by SPI masters only. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 198 +++++++++++++++++++++++++++--------------------- 1 file changed, 113 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0a78745e5766..7b2170bfd6e7 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -24,7 +24,7 @@ struct dma_chan; struct property_entry; -struct spi_master; +struct spi_controller; struct spi_transfer; struct spi_flash_read_message; @@ -84,7 +84,7 @@ struct spi_statistics { void spi_statistics_add_transfer_stats(struct spi_statistics *stats, struct spi_transfer *xfer, - struct spi_master *master); + struct spi_controller *ctlr); #define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count) \ do { \ @@ -98,13 +98,14 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats, SPI_STATISTICS_ADD_TO_FIELD(stats, field, 1) /** - * struct spi_device - Master side proxy for an SPI slave device + * struct spi_device - Controller side proxy for an SPI slave device * @dev: Driver model representation of the device. - * @master: SPI controller used with the device. + * @controller: SPI controller used with the device. + * @master: Copy of controller, for backwards compatibility. * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. - * @chip_select: Chipselect, distinguishing chips handled by @master. + * @chip_select: Chipselect, distinguishing chips handled by @controller. * @mode: The spi mode defines how data is clocked out and in. * This may be changed by the device's driver. * The "active low" default for chipselect mode can be overridden @@ -140,7 +141,8 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats, */ struct spi_device { struct device dev; - struct spi_master *master; + struct spi_controller *controller; + struct spi_controller *master; /* compatibility layer */ u32 max_speed_hz; u8 chip_select; u8 bits_per_word; @@ -198,7 +200,7 @@ static inline void spi_dev_put(struct spi_device *spi) put_device(&spi->dev); } -/* ctldata is for the bus_master driver's runtime state */ +/* ctldata is for the bus_controller driver's runtime state */ static inline void *spi_get_ctldata(struct spi_device *spi) { return spi->controller_state; @@ -292,9 +294,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) spi_unregister_driver) /** - * struct spi_master - interface to SPI master controller + * struct spi_controller - interface to SPI master or slave controller * @dev: device interface to this driver - * @list: link with the global spi_master list + * @list: link with the global spi_controller list * @bus_num: board-specific (and often SOC-specific) identifier for a * given SPI controller. * @num_chipselect: chipselects are used to distinguish individual @@ -327,8 +329,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * the device whose settings are being modified. * @transfer: adds a message to the controller's transfer queue. * @cleanup: frees controller-specific state - * @can_dma: determine whether this master supports DMA - * @queued: whether this master is providing an internal message queue + * @can_dma: determine whether this controller supports DMA + * @queued: whether this controller is providing an internal message queue * @kworker: thread struct for message pump * @kworker_task: pointer to task for message pump kworker thread * @pump_messages: work struct for scheduling work to the message pump @@ -384,7 +386,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS * number. Any individual value may be -ENOENT for CS lines that * are not GPIOs (driven by the SPI controller itself). - * @statistics: statistics for the spi_master + * @statistics: statistics for the spi_controller * @dma_tx: DMA transmit channel * @dma_rx: DMA receive channel * @dummy_rx: dummy receive buffer for full-duplex devices @@ -393,7 +395,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * what Linux expects, this optional hook can be used to translate * between the two. * - * Each SPI master controller can communicate with one or more @spi_device + * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals * but not chip select signals. Each device may be configured to use a * different clock rate, since those shared signals are ignored unless @@ -404,7 +406,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * an SPI slave device. For each such message it queues, it calls the * message's completion function when the transaction completes. */ -struct spi_master { +struct spi_controller { struct device dev; struct list_head list; @@ -442,12 +444,13 @@ struct spi_master { /* other constraints relevant to this driver */ u16 flags; -#define SPI_MASTER_HALF_DUPLEX BIT(0) /* can't do full duplex */ -#define SPI_MASTER_NO_RX BIT(1) /* can't do buffer read */ -#define SPI_MASTER_NO_TX BIT(2) /* can't do buffer write */ -#define SPI_MASTER_MUST_RX BIT(3) /* requires rx */ -#define SPI_MASTER_MUST_TX BIT(4) /* requires tx */ -#define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_HALF_DUPLEX BIT(0) /* can't do full duplex */ +#define SPI_CONTROLLER_NO_RX BIT(1) /* can't do buffer read */ +#define SPI_CONTROLLER_NO_TX BIT(2) /* can't do buffer write */ +#define SPI_CONTROLLER_MUST_RX BIT(3) /* requires rx */ +#define SPI_CONTROLLER_MUST_TX BIT(4) /* requires tx */ + +#define SPI_MASTER_GPIO_SS BIT(5) /* GPIO CS must select slave */ /* flag indicating this is an SPI slave controller */ bool slave; @@ -485,8 +488,8 @@ struct spi_master { * any other request management * + To a given spi_device, message queueing is pure fifo * - * + The master's main job is to process its message queue, - * selecting a chip then transferring data + * + The controller's main job is to process its message queue, + * selecting a chip (for masters), then transferring data * + If there are multiple spi_device children, the i/o queue * arbitration algorithm is unspecified (round robin, fifo, * priority, reservations, preemption, etc) @@ -499,7 +502,7 @@ struct spi_master { int (*transfer)(struct spi_device *spi, struct spi_message *mesg); - /* called on release() to free memory provided by spi_master */ + /* called on release() to free memory provided by spi_controller */ void (*cleanup)(struct spi_device *spi); /* @@ -509,13 +512,13 @@ struct spi_master { * not modify or store xfer and dma_tx and dma_rx must be set * while the device is prepared. */ - bool (*can_dma)(struct spi_master *master, + bool (*can_dma)(struct spi_controller *ctlr, struct spi_device *spi, struct spi_transfer *xfer); /* * These hooks are for drivers that want to use the generic - * master transfer queueing mechanism. If these are used, the + * controller transfer queueing mechanism. If these are used, the * transfer() function above must NOT be specified by the driver. * Over time we expect SPI drivers to be phased over to this API. */ @@ -536,15 +539,15 @@ struct spi_master { struct completion xfer_completion; size_t max_dma_len; - int (*prepare_transfer_hardware)(struct spi_master *master); - int (*transfer_one_message)(struct spi_master *master, + int (*prepare_transfer_hardware)(struct spi_controller *ctlr); + int (*transfer_one_message)(struct spi_controller *ctlr, struct spi_message *mesg); - int (*unprepare_transfer_hardware)(struct spi_master *master); - int (*prepare_message)(struct spi_master *master, + int (*unprepare_transfer_hardware)(struct spi_controller *ctlr); + int (*prepare_message)(struct spi_controller *ctlr, struct spi_message *message); - int (*unprepare_message)(struct spi_master *master, + int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); - int (*slave_abort)(struct spi_master *spi); + int (*slave_abort)(struct spi_controller *ctlr); int (*spi_flash_read)(struct spi_device *spi, struct spi_flash_read_message *msg); bool (*spi_flash_can_dma)(struct spi_device *spi, @@ -556,9 +559,9 @@ struct spi_master { * of transfer_one_message() provied by the core. */ void (*set_cs)(struct spi_device *spi, bool enable); - int (*transfer_one)(struct spi_master *master, struct spi_device *spi, + int (*transfer_one)(struct spi_controller *ctlr, struct spi_device *spi, struct spi_transfer *transfer); - void (*handle_err)(struct spi_master *master, + void (*handle_err)(struct spi_controller *ctlr, struct spi_message *message); /* gpio chip select */ @@ -575,58 +578,59 @@ struct spi_master { void *dummy_rx; void *dummy_tx; - int (*fw_translate_cs)(struct spi_master *master, unsigned cs); + int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs); }; -static inline void *spi_master_get_devdata(struct spi_master *master) +static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) { - return dev_get_drvdata(&master->dev); + return dev_get_drvdata(&ctlr->dev); } -static inline void spi_master_set_devdata(struct spi_master *master, void *data) +static inline void spi_controller_set_devdata(struct spi_controller *ctlr, + void *data) { - dev_set_drvdata(&master->dev, data); + dev_set_drvdata(&ctlr->dev, data); } -static inline struct spi_master *spi_master_get(struct spi_master *master) +static inline struct spi_controller *spi_controller_get(struct spi_controller *ctlr) { - if (!master || !get_device(&master->dev)) + if (!ctlr || !get_device(&ctlr->dev)) return NULL; - return master; + return ctlr; } -static inline void spi_master_put(struct spi_master *master) +static inline void spi_controller_put(struct spi_controller *ctlr) { - if (master) - put_device(&master->dev); + if (ctlr) + put_device(&ctlr->dev); } -static inline bool spi_controller_is_slave(struct spi_master *ctlr) +static inline bool spi_controller_is_slave(struct spi_controller *ctlr) { return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave; } /* PM calls that need to be issued by the driver */ -extern int spi_master_suspend(struct spi_master *master); -extern int spi_master_resume(struct spi_master *master); +extern int spi_controller_suspend(struct spi_controller *ctlr); +extern int spi_controller_resume(struct spi_controller *ctlr); /* Calls the driver make to interact with the message queue */ -extern struct spi_message *spi_get_next_queued_message(struct spi_master *master); -extern void spi_finalize_current_message(struct spi_master *master); -extern void spi_finalize_current_transfer(struct spi_master *master); +extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr); +extern void spi_finalize_current_message(struct spi_controller *ctlr); +extern void spi_finalize_current_transfer(struct spi_controller *ctlr); -/* the spi driver core manages memory for the spi_master classdev */ -extern struct spi_master *__spi_alloc_controller(struct device *host, - unsigned int size, bool slave); +/* the spi driver core manages memory for the spi_controller classdev */ +extern struct spi_controller *__spi_alloc_controller(struct device *host, + unsigned int size, bool slave); -static inline struct spi_master *spi_alloc_master(struct device *host, - unsigned int size) +static inline struct spi_controller *spi_alloc_master(struct device *host, + unsigned int size) { return __spi_alloc_controller(host, size, false); } -static inline struct spi_master *spi_alloc_slave(struct device *host, - unsigned int size) +static inline struct spi_controller *spi_alloc_slave(struct device *host, + unsigned int size) { if (!IS_ENABLED(CONFIG_SPI_SLAVE)) return NULL; @@ -634,18 +638,18 @@ static inline struct spi_master *spi_alloc_slave(struct device *host, return __spi_alloc_controller(host, size, true); } -extern int spi_register_master(struct spi_master *master); -extern int devm_spi_register_master(struct device *dev, - struct spi_master *master); -extern void spi_unregister_master(struct spi_master *master); +extern int spi_register_controller(struct spi_controller *ctlr); +extern int devm_spi_register_controller(struct device *dev, + struct spi_controller *ctlr); +extern void spi_unregister_controller(struct spi_controller *ctlr); -extern struct spi_master *spi_busnum_to_master(u16 busnum); +extern struct spi_controller *spi_busnum_to_master(u16 busnum); /* * SPI resource management while processing a SPI message */ -typedef void (*spi_res_release_t)(struct spi_master *master, +typedef void (*spi_res_release_t)(struct spi_controller *ctlr, struct spi_message *msg, void *res); @@ -670,7 +674,7 @@ extern void *spi_res_alloc(struct spi_device *spi, extern void spi_res_add(struct spi_message *message, void *res); extern void spi_res_free(void *res); -extern void spi_res_release(struct spi_master *master, +extern void spi_res_release(struct spi_controller *ctlr, struct spi_message *message); /*---------------------------------------------------------------------------*/ @@ -854,7 +858,7 @@ struct spi_message { /* for optional use by whatever driver currently owns the * spi_message ... between calls to spi_async and then later - * complete(), that's the spi_master controller driver. + * complete(), that's the spi_controller controller driver. */ struct list_head queue; void *state; @@ -943,21 +947,22 @@ extern int spi_slave_abort(struct spi_device *spi); static inline size_t spi_max_message_size(struct spi_device *spi) { - struct spi_master *master = spi->master; - if (!master->max_message_size) + struct spi_controller *ctlr = spi->controller; + + if (!ctlr->max_message_size) return SIZE_MAX; - return master->max_message_size(spi); + return ctlr->max_message_size(spi); } static inline size_t spi_max_transfer_size(struct spi_device *spi) { - struct spi_master *master = spi->master; + struct spi_controller *ctlr = spi->controller; size_t tr_max = SIZE_MAX; size_t msg_max = spi_max_message_size(spi); - if (master->max_transfer_size) - tr_max = master->max_transfer_size(spi); + if (ctlr->max_transfer_size) + tr_max = ctlr->max_transfer_size(spi); /* transfer size limit must not be greater than messsage size limit */ return min(tr_max, msg_max); @@ -968,7 +973,7 @@ spi_max_transfer_size(struct spi_device *spi) /* SPI transfer replacement methods which make use of spi_res */ struct spi_replaced_transfers; -typedef void (*spi_replaced_release_t)(struct spi_master *master, +typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr, struct spi_message *msg, struct spi_replaced_transfers *res); /** @@ -1012,7 +1017,7 @@ extern struct spi_replaced_transfers *spi_replace_transfers( /* SPI transfer transformation methods */ -extern int spi_split_transfers_maxsize(struct spi_master *master, +extern int spi_split_transfers_maxsize(struct spi_controller *ctlr, struct spi_message *msg, size_t maxsize, gfp_t gfp); @@ -1026,8 +1031,8 @@ extern int spi_split_transfers_maxsize(struct spi_master *master, extern int spi_sync(struct spi_device *spi, struct spi_message *message); extern int spi_sync_locked(struct spi_device *spi, struct spi_message *message); -extern int spi_bus_lock(struct spi_master *master); -extern int spi_bus_unlock(struct spi_master *master); +extern int spi_bus_lock(struct spi_controller *ctlr); +extern int spi_bus_unlock(struct spi_controller *ctlr); /** * spi_sync_transfer - synchronous SPI data transfer @@ -1212,9 +1217,9 @@ struct spi_flash_read_message { /* SPI core interface for flash read support */ static inline bool spi_flash_read_supported(struct spi_device *spi) { - return spi->master->spi_flash_read && - (!spi->master->flash_read_supported || - spi->master->flash_read_supported(spi)); + return spi->controller->spi_flash_read && + (!spi->controller->flash_read_supported || + spi->controller->flash_read_supported(spi)); } int spi_flash_read(struct spi_device *spi, @@ -1247,7 +1252,7 @@ int spi_flash_read(struct spi_device *spi, * @irq: Initializes spi_device.irq; depends on how the board is wired. * @max_speed_hz: Initializes spi_device.max_speed_hz; based on limits * from the chip datasheet and board-specific signal quality issues. - * @bus_num: Identifies which spi_master parents the spi_device; unused + * @bus_num: Identifies which spi_controller parents the spi_device; unused * by spi_new_device(), and otherwise depends on board wiring. * @chip_select: Initializes spi_device.chip_select; depends on how * the board is wired. @@ -1288,7 +1293,7 @@ struct spi_board_info { /* bus_num is board specific and matches the bus_num of some - * spi_master that will probably be registered later. + * spi_controller that will probably be registered later. * * chip_select reflects how this chip is wired to that master; * it's less than num_chipselect. @@ -1322,7 +1327,7 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) /* If you're hotplugging an adapter with devices (parport, usb, etc) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but - * normally that would be handled by spi_unregister_master(). + * normally that would be handled by spi_unregister_controller(). * * You can also use spi_alloc_device() and spi_add_device() to use a two * stage registration sequence for each spi_device. This gives the caller @@ -1331,13 +1336,13 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n) * be defined using the board info. */ extern struct spi_device * -spi_alloc_device(struct spi_master *master); +spi_alloc_device(struct spi_controller *ctlr); extern int spi_add_device(struct spi_device *spi); extern struct spi_device * -spi_new_device(struct spi_master *, struct spi_board_info *); +spi_new_device(struct spi_controller *, struct spi_board_info *); extern void spi_unregister_device(struct spi_device *spi); @@ -1345,9 +1350,32 @@ extern const struct spi_device_id * spi_get_device_id(const struct spi_device *sdev); static inline bool -spi_transfer_is_last(struct spi_master *master, struct spi_transfer *xfer) +spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) { - return list_is_last(&xfer->transfer_list, &master->cur_msg->transfers); + return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers); } + +/* Compatibility layer */ +#define spi_master spi_controller + +#define SPI_MASTER_HALF_DUPLEX SPI_CONTROLLER_HALF_DUPLEX +#define SPI_MASTER_NO_RX SPI_CONTROLLER_NO_RX +#define SPI_MASTER_NO_TX SPI_CONTROLLER_NO_TX +#define SPI_MASTER_MUST_RX SPI_CONTROLLER_MUST_RX +#define SPI_MASTER_MUST_TX SPI_CONTROLLER_MUST_TX + +#define spi_master_get_devdata(_ctlr) spi_controller_get_devdata(_ctlr) +#define spi_master_set_devdata(_ctlr, _data) \ + spi_controller_set_devdata(_ctlr, _data) +#define spi_master_get(_ctlr) spi_controller_get(_ctlr) +#define spi_master_put(_ctlr) spi_controller_put(_ctlr) +#define spi_master_suspend(_ctlr) spi_controller_suspend(_ctlr) +#define spi_master_resume(_ctlr) spi_controller_resume(_ctlr) + +#define spi_register_master(_ctlr) spi_register_controller(_ctlr) +#define devm_spi_register_master(_dev, _ctlr) \ + devm_spi_register_controller(_dev, _ctlr) +#define spi_unregister_master(_ctlr) spi_unregister_controller(_ctlr) + #endif /* __LINUX_SPI_H */ -- cgit v1.2.3 From 4798a714d6a78171d7df48c921dddd0dc004f0a0 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Tue, 13 Jun 2017 10:56:08 -0400 Subject: of_mdio: move of_mdio_parse_addr to header file The of_mdio_parse_addr() helper function is useful to other code, but the module dependency chain causes issues. To work around this, we can move of_mdio_parse_addr() to be an inline function in the header file. This gets rid of the dependencies and still allows for the reuse of code. Reported-by: Liviu Dudau Signed-off-by: Jon Mason Fixes: 342fa1964439 ("mdio: mux: make child bus walking more permissive and errors more verbose") Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index ba35ba520487..f5db93bcd069 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -27,11 +27,33 @@ struct phy_device *of_phy_attach(struct net_device *dev, phy_interface_t iface); extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); -extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np); extern int of_phy_register_fixed_link(struct device_node *np); extern void of_phy_deregister_fixed_link(struct device_node *np); extern bool of_phy_is_fixed_link(struct device_node *np); + +static inline int of_mdio_parse_addr(struct device *dev, + const struct device_node *np) +{ + u32 addr; + int ret; + + ret = of_property_read_u32(np, "reg", &addr); + if (ret < 0) { + dev_err(dev, "%s has invalid PHY address\n", np->full_name); + return ret; + } + + /* A PHY must have a reg property in the range [0-31] */ + if (addr >= PHY_MAX_ADDR) { + dev_err(dev, "%s PHY address %i is too large\n", + np->full_name, addr); + return -EINVAL; + } + + return addr; +} + #else /* CONFIG_OF_MDIO */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { -- cgit v1.2.3 From 058fe49da3b6ab71b57effd49dcc5d007071eea5 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Mon, 12 Jun 2017 09:24:39 +0800 Subject: spi: mediatek: adjust register to enhance time accuracy this patch adjust register to enhance time accuracy. Signed-off-by: Leilk Liu Signed-off-by: Mark Brown --- include/linux/platform_data/spi-mt65xx.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/spi-mt65xx.h b/include/linux/platform_data/spi-mt65xx.h index 54b04483976c..ba4e4bb70262 100644 --- a/include/linux/platform_data/spi-mt65xx.h +++ b/include/linux/platform_data/spi-mt65xx.h @@ -16,5 +16,7 @@ struct mtk_chip_config { u32 tx_mlsb; u32 rx_mlsb; + u32 cs_pol; + u32 sample_sel; }; #endif -- cgit v1.2.3 From 00f4b652b6f1dbfd4e1d5419d7f1cc23b1374da8 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:43 -0500 Subject: trace: rename trace_enum_map to trace_eval_map Each enum is loaded into the trace_enum_map, as we are now using this for more than enums rename it. Link: http://lkml.kernel.org/r/20170531215653.3240-3-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- include/linux/module.h | 2 +- include/linux/tracepoint.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 21f56393602f..46b48043d741 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -442,7 +442,7 @@ struct module { #ifdef CONFIG_EVENT_TRACING struct trace_event_call **trace_events; unsigned int num_trace_events; - struct trace_enum_map **trace_enums; + struct trace_eval_map **trace_enums; unsigned int num_trace_enums; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index cc48cb2ce209..f7b0f5525e46 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -25,10 +25,10 @@ struct module; struct tracepoint; struct notifier_block; -struct trace_enum_map { +struct trace_eval_map { const char *system; - const char *enum_string; - unsigned long enum_value; + const char *eval_string; + unsigned long eval_value; }; #define TRACEPOINT_DEFAULT_PRIO 10 -- cgit v1.2.3 From 99be647c5841d570a23b5dfa65bfecada8b6e6b5 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:44 -0500 Subject: trace: rename struct module entry for trace enums Each module has a list of enum's its contributing to the enum map, rename that entry to reflect its use by more than enums. Link: http://lkml.kernel.org/r/20170531215653.3240-4-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- include/linux/module.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 46b48043d741..8eb9a1e693e5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -442,8 +442,8 @@ struct module { #ifdef CONFIG_EVENT_TRACING struct trace_event_call **trace_events; unsigned int num_trace_events; - struct trace_eval_map **trace_enums; - unsigned int num_trace_enums; + struct trace_eval_map **trace_evals; + unsigned int num_trace_evals; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD unsigned int num_ftrace_callsites; -- cgit v1.2.3 From 4f0dfd76e9cc9296d74d6d5f579a5c7ca3bed869 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:50 -0500 Subject: tracing: define TRACE_DEFINE_SIZEOF() macro to map sizeof's to their values Perf has a problem that if sizeof() macros are used within TRACE_EVENT() macro's they end up in userspace as "sizeof(kernel structure)" which cannot properly be parsed. Add a macro which can forward this data through the eval_map for userspace utilization. Link: http://lkml.kernel.org/r/20170531215653.3240-10-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index f7b0f5525e46..a26ffbe09e71 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -88,6 +88,7 @@ extern void syscall_unregfunc(void); #define PARAMS(args...) args #define TRACE_DEFINE_ENUM(x) +#define TRACE_DEFINE_SIZEOF(x) #endif /* _LINUX_TRACEPOINT_H */ -- cgit v1.2.3 From 192a82f9003fe8fabd6088aa646e829225a94c55 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Jun 2017 09:42:28 +0100 Subject: hrtimer_nanosleep(): Pass rmtp in restart_block Store the pointer to the timespec which gets updated with the remaining time in the restart block and remove the function argument. [ tglx: Added changelog ] Signed-off-by: Al Viro Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170607084241.28657-3-viro@ZenIV.linux.org.uk --- include/linux/hrtimer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 8c5b10eb7265..b80c34f6fd4b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -453,7 +453,6 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer, /* Precise sleep: */ extern long hrtimer_nanosleep(struct timespec64 *rqtp, - struct timespec __user *rmtp, const enum hrtimer_mode mode, const clockid_t clockid); extern long hrtimer_nanosleep_restart(struct restart_block *restart_block); -- cgit v1.2.3 From edbeda46322fbcb15af2d2d0f2daffb0cd349a5a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Jun 2017 09:42:31 +0100 Subject: time/posix-timers: Move the compat copyouts to the nanosleep implementations Turn restart_block.nanosleep.{rmtp,compat_rmtp} into a tagged union (kind = 1 -> native, kind = 2 -> compat, kind = 0 -> nothing) and make the places doing actual copyout handle compat as well as native (that will become a helper in the next commit). Result: compat wrappers, messing with reassignments, etc. are gone. [ tglx: Folded in a variant of Peter Zijlstras enum patch ] Signed-off-by: Al Viro Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170607084241.28657-6-viro@ZenIV.linux.org.uk --- include/linux/posix-timers.h | 2 -- include/linux/restart_block.h | 15 +++++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 667095dbcd37..29f1b7f09ced 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -110,8 +110,6 @@ void posix_cpu_timers_exit_group(struct task_struct *task); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, u64 *newval, u64 *oldval); -long clock_nanosleep_restart(struct restart_block *restart_block); - void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); void posixtimer_rearm(struct siginfo *info); diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index 0d905d8ec553..19df8422606c 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -11,6 +11,14 @@ struct timespec; struct compat_timespec; struct pollfd; +enum timespec_type { + TT_NONE = 0, + TT_NATIVE = 1, +#ifdef CONFIG_COMPAT + TT_COMPAT = 2, +#endif +}; + /* * System call restart block. */ @@ -29,10 +37,13 @@ struct restart_block { /* For nanosleep */ struct { clockid_t clockid; - struct timespec __user *rmtp; + enum timespec_type type; + union { + struct timespec __user *rmtp; #ifdef CONFIG_COMPAT - struct compat_timespec __user *compat_rmtp; + struct compat_timespec __user *compat_rmtp; #endif + }; u64 expires; } nanosleep; /* For poll */ -- cgit v1.2.3 From ce41aaf47af3d28c4c958e07675a3e0a51f09bd3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Jun 2017 09:42:32 +0100 Subject: hrtimers/posix-timers: Merge nanosleep timespec copyout logics into a new helper Signed-off-by: Al Viro Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170607084241.28657-7-viro@ZenIV.linux.org.uk --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index b80c34f6fd4b..38b968f3df4e 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -452,6 +452,8 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer, } /* Precise sleep: */ + +extern int nanosleep_copyout(struct restart_block *, struct timespec *); extern long hrtimer_nanosleep(struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid); -- cgit v1.2.3 From fb923c4a3c2ee735755d4a93522150fc35d0ecbd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Jun 2017 09:42:33 +0100 Subject: posix-timers: Kill ->nsleep_restart() No more users. Signed-off-by: Al Viro Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170607084241.28657-8-viro@ZenIV.linux.org.uk --- include/linux/hrtimer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 38b968f3df4e..d83b7ed1cb0e 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -457,7 +457,6 @@ extern int nanosleep_copyout(struct restart_block *, struct timespec *); extern long hrtimer_nanosleep(struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid); -extern long hrtimer_nanosleep_restart(struct restart_block *restart_block); extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *tsk); -- cgit v1.2.3 From 3a4d44b6162555070194e486ff6b3799a8d323a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Jun 2017 09:42:34 +0100 Subject: ntp: Move adjtimex related compat syscalls to native counterparts Get rid of set_fs() mess and sanitize compat_{get,put}_timex(), while we are at it. Signed-off-by: Al Viro Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170607084241.28657-9-viro@ZenIV.linux.org.uk --- include/linux/compat.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 1c5f3152cbb5..ecb8dd261d36 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -128,6 +128,10 @@ struct compat_timex { compat_int_t:32; compat_int_t:32; compat_int_t:32; }; +struct timex; +int compat_get_timex(struct timex *, const struct compat_timex __user *); +int compat_put_timex(struct compat_timex __user *, const struct timex *); + #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) typedef struct { -- cgit v1.2.3 From 54ad9c46c262ce4a603dc7887e37956896a0211d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Jun 2017 09:42:37 +0100 Subject: itimers: Move compat itimer syscalls to native ones get rid of set_fs(), sanitize compat copyin/copyout. Signed-off-by: Al Viro Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170607084241.28657-12-viro@ZenIV.linux.org.uk --- include/linux/compat.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index ecb8dd261d36..425563c7647b 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -94,6 +94,10 @@ struct compat_itimerval { struct compat_timeval it_value; }; +struct itimerval; +int get_compat_itimerval(struct itimerval *, const struct compat_itimerval __user *); +int put_compat_itimerval(struct compat_itimerval __user *, const struct itimerval *); + struct compat_tms { compat_clock_t tms_utime; compat_clock_t tms_stime; -- cgit v1.2.3 From 938e7cf2d569833a5acf689a8926faf507826253 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Jun 2017 23:34:33 +0200 Subject: posix-timers: Make nanosleep timespec argument const No nanosleep implementation modifies the rqtp argument. Mark is const. Signed-off-by: Thomas Gleixner Cc: Al Viro Cc: John Stultz Cc: Peter Zijlstra --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d83b7ed1cb0e..255edd5e7a74 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -454,7 +454,7 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer, /* Precise sleep: */ extern int nanosleep_copyout(struct restart_block *, struct timespec *); -extern long hrtimer_nanosleep(struct timespec64 *rqtp, +extern long hrtimer_nanosleep(const struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid); -- cgit v1.2.3 From 1727339590fdb5a1ded881b540cd32121278d414 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 26 May 2017 16:56:11 +0200 Subject: clocksource/drivers: Rename CLOCKSOURCE_OF_DECLARE to TIMER_OF_DECLARE The CLOCKSOURCE_OF_DECLARE macro is used widely for the timers to declare the clocksource at early stage. However, this macro is also used to initialize the clockevent if any, or the clockevent only. It was originally suggested to declare another macro to initialize a clockevent, so in order to separate the two entities even they belong to the same IP. This was not accepted because of the impact on the DT where splitting a clocksource/clockevent definition does not make sense as it is a Linux concept not a hardware description. On the other side, the clocksource has not interrupt declared while the clockevent has, so it is easy from the driver to know if the description is for a clockevent or a clocksource, IOW it could be implemented at the driver level. So instead of dealing with a named clocksource macro, let's use a more generic one: TIMER_OF_DECLARE. The patch has not functional changes. Signed-off-by: Daniel Lezcano Acked-by: Heiko Stuebner Acked-by: Neil Armstrong Acked-by: Arnd Bergmann Acked-by: Matthias Brugger Reviewed-by: Linus Walleij --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f2b10d9ebd04..a86b65f0a246 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -249,7 +249,7 @@ extern int clocksource_mmio_init(void __iomem *, const char *, extern int clocksource_i8253_init(void); -#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ +#define TIMER_OF_DECLARE(name, compat, fn) \ OF_DECLARE_1_RET(clksrc, name, compat, fn) #ifdef CONFIG_CLKSRC_PROBE -- cgit v1.2.3 From ba5d08c0ea785d5710c5a1e7dc3182b7124d63c0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 26 May 2017 17:40:46 +0200 Subject: clocksource/drivers: Rename clocksource_probe to timer_probe The function name is now renamed to 'timer_probe' for consistency with the CLOCKSOURCE_OF_DECLARE => TIMER_OF_DECLARE change. Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Acked-by: Heiko Stuebner Reviewed-by: Linus Walleij --- include/linux/clocksource.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index a86b65f0a246..010bb9f60db2 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -253,9 +253,9 @@ extern int clocksource_i8253_init(void); OF_DECLARE_1_RET(clksrc, name, compat, fn) #ifdef CONFIG_CLKSRC_PROBE -extern void clocksource_probe(void); +extern void timer_probe(void); #else -static inline void clocksource_probe(void) {} +static inline void timer_probe(void) {} #endif #define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn) \ -- cgit v1.2.3 From 77d62f532282010d5859a99819d6a0c233bfcfff Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 26 May 2017 17:42:25 +0200 Subject: clocksource/drivers: Rename CLOCKSOURCE_ACPI_DECLARE to TIMER_ACPI_DECLARE The macro name is now renamed to 'TIMER_ACPI_DECLARE' for consistency with the CLOCKSOURCE_OF_DECLARE => TIMER_OF_DECLARE change. Signed-off-by: Daniel Lezcano Reviewed-by: Linus Walleij --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 010bb9f60db2..e43f37f9a1b6 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -258,7 +258,7 @@ extern void timer_probe(void); static inline void timer_probe(void) {} #endif -#define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn) \ +#define TIMER_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(clksrc, name, table_id, 0, NULL, 0, fn) #endif /* _LINUX_CLOCKSOURCE_H */ -- cgit v1.2.3 From 2fcc112af37fa88f8da077d6dd3bb8e38e75adb1 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 26 May 2017 18:33:27 +0200 Subject: clocksource/drivers: Rename clksrc table to timer The table name is now renamed to 'timer' for consistency with the CLOCKSOURCE_OF_DECLARE => TIMER_OF_DECLARE change. Signed-off-by: Daniel Lezcano Reviewed-by: Linus Walleij --- include/linux/clocksource.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e43f37f9a1b6..7cd38b21cbd3 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -250,7 +250,7 @@ extern int clocksource_mmio_init(void __iomem *, const char *, extern int clocksource_i8253_init(void); #define TIMER_OF_DECLARE(name, compat, fn) \ - OF_DECLARE_1_RET(clksrc, name, compat, fn) + OF_DECLARE_1_RET(timer, name, compat, fn) #ifdef CONFIG_CLKSRC_PROBE extern void timer_probe(void); @@ -259,6 +259,6 @@ static inline void timer_probe(void) {} #endif #define TIMER_ACPI_DECLARE(name, table_id, fn) \ - ACPI_DECLARE_PROBE_ENTRY(clksrc, name, table_id, 0, NULL, 0, fn) + ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) #endif /* _LINUX_CLOCKSOURCE_H */ -- cgit v1.2.3 From bb0eb050a577a866cb47c2dc37596f1207f4c2d9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 26 May 2017 19:34:11 +0200 Subject: clocksource/drivers: Rename CLKSRC_OF to TIMER_OF The config option name is now renamed to 'TIMER_OF' for consistency with the CLOCKSOURCE_OF_DECLARE => TIMER_OF_DECLARE change. Signed-off-by: Daniel Lezcano Reviewed-by: Linus Walleij --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 7cd38b21cbd3..c48ceefe0e6e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -252,7 +252,7 @@ extern int clocksource_i8253_init(void); #define TIMER_OF_DECLARE(name, compat, fn) \ OF_DECLARE_1_RET(timer, name, compat, fn) -#ifdef CONFIG_CLKSRC_PROBE +#ifdef CONFIG_TIMER_PROBE extern void timer_probe(void); #else static inline void timer_probe(void) {} -- cgit v1.2.3 From 8b7a3b568814a8e36d2910dd74465b0215aa0a31 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 30 May 2017 08:35:40 +0200 Subject: clocksource/drivers: Add an alias macro CLOCKSOURCE_OF_DECLARE The macro CLOCKSOURCE_OF_DECLARE has been rename to TIMER_OF_DECLARE. In order to prevent conflicts for the next merge window, a temporary alias has been added which will be removed later. Cc: Arnd Bergman Signed-off-by: Daniel Lezcano --- include/linux/clocksource.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index c48ceefe0e6e..d92bd83eed9f 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -252,6 +252,9 @@ extern int clocksource_i8253_init(void); #define TIMER_OF_DECLARE(name, compat, fn) \ OF_DECLARE_1_RET(timer, name, compat, fn) +#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ + TIMER_OF_DECLARE(name, compat, fn) + #ifdef CONFIG_TIMER_PROBE extern void timer_probe(void); #else -- cgit v1.2.3 From 92c8f7c0e109d2fcff607a13dd7c1437d6c9f87a Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 13 Jun 2017 22:24:39 +0200 Subject: tty/serial: atmel: make the driver DT only Now that AVR32 is gone, platform_data are not used to initialize the driver anymore, remove that path from the driver. Also remove the now unused struct atmel_uart_data. Signed-off-by: Alexandre Belloni Acked-by: Richard Genoud Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/atmel.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 3c8825b67298..d36bc8d17e97 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -9,7 +9,6 @@ #include #include -#include /* Compact Flash */ struct at91_cf_data { @@ -42,15 +41,6 @@ struct atmel_nand_data { bool need_reset_workaround; }; - /* Serial */ -struct atmel_uart_data { - int num; /* port num */ - short use_dma_tx; /* use transmit DMA? */ - short use_dma_rx; /* use receive DMA? */ - void __iomem *regs; /* virt. base address, if any */ - struct serial_rs485 rs485; /* rs485 settings */ -}; - /* FIXME: this needs a better location, but gets stuff building again */ extern int at91_suspend_entering_slow_clock(void); -- cgit v1.2.3 From 393cc3f51135ea2520521f776ef3afdf3395c797 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 13 Jun 2017 13:35:50 +0200 Subject: fs/fcntl: f_setown, allow returning error Allow f_setown to return an error value. We will fail in the next patch with EINVAL for bad input to f_setown, so tile the path for the later patch. Signed-off-by: Jiri Slaby Reviewed-by: Jeff Layton Cc: Jeff Layton Cc: "J. Bruce Fields" Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Jeff Layton --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index aa4affb38c39..25ee1ff6d45b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1249,7 +1249,7 @@ extern void fasync_free(struct fasync_struct *); extern void kill_fasync(struct fasync_struct **, int, int); extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); -extern void f_setown(struct file *filp, unsigned long arg, int force); +extern int f_setown(struct file *filp, unsigned long arg, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); -- cgit v1.2.3 From 3bc1630774bc9f202308ae04608a32c366b41caf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 25 Apr 2017 19:38:48 +0200 Subject: of: Provide dummy of_device_compatible_match() for compile-testing Most of_device_*() functions have dummy versions for CONFIG_OF=n, but of_device_compatible_match() hasn't. Fix that to improve the ability to do compile-testing. Fixes: b9c13fe32faaa71c ("dt: Add of_device_compatible_match()") Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- include/linux/of.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 50fcdb54087f..c72ba9437a43 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -627,6 +627,12 @@ static inline int of_device_is_compatible(const struct device_node *device, return 0; } +static inline int of_device_compatible_match(struct device_node *device, + const char *const *compat) +{ + return 0; +} + static inline bool of_device_is_available(const struct device_node *device) { return false; -- cgit v1.2.3 From 31fd85816dbe3a714bcc3f67c17c3dd87011f79e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 13 Jun 2017 15:52:13 -0700 Subject: bpf: permits narrower load from bpf program context fields Currently, verifier will reject a program if it contains an narrower load from the bpf context structure. For example, __u8 h = __sk_buff->hash, or __u16 p = __sk_buff->protocol __u32 sample_period = bpf_perf_event_data->sample_period which are narrower loads of 4-byte or 8-byte field. This patch solves the issue by: . Introduce a new parameter ctx_field_size to carry the field size of narrower load from prog type specific *__is_valid_access validator back to verifier. . The non-zero ctx_field_size for a memory access indicates (1). underlying prog type specific convert_ctx_accesses supporting non-whole-field access (2). the current insn is a narrower or whole field access. . In verifier, for such loads where load memory size is less than ctx_field_size, verifier transforms it to a full field load followed by proper masking. . Currently, __sk_buff and bpf_perf_event_data->sample_period are supporting narrowing loads. . Narrower stores are still not allowed as typical ctx stores are just normal stores. Because of this change, some tests in verifier will fail and these tests are removed. As a bonus, rename some out of bound __sk_buff->cb access to proper field name and remove two redundant "skb cb oob" tests. Acked-by: Daniel Borkmann Signed-off-by: Yonghong Song Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- include/linux/bpf_verifier.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c32bace66d3d..1bcbf0a71f75 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -157,7 +157,7 @@ struct bpf_verifier_ops { * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type); + enum bpf_reg_type *reg_type, int *ctx_field_size); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d5093b52b485..189741c0da85 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -73,6 +73,7 @@ struct bpf_insn_aux_data { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; + int ctx_field_size; /* the ctx field size for load/store insns, maybe 0 */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ -- cgit v1.2.3 From 73a7242a06ff995d771fbe243e72b516feaa6e3d Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 13 Jun 2017 17:18:01 -0400 Subject: cgroup: Keep accurate count of tasks in each css_set The reference count in the css_set data structure was used as a proxy of the number of tasks attached to that css_set. However, that count is actually not an accurate measure especially with thread mode support. So a new variable nr_tasks is added to the css_set to keep track of the actual task count. This new variable is protected by the css_set_lock. Functions that require the actual task count are updated to use the new variable. tj: s/task_count/nr_tasks/ for consistency with cgroup_root->nr_cgrps. Refreshed on top of cgroup/for-v4.13 which dropped on css_set_populated() -> nr_tasks conversion. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ec47101cb1bf..3bc4196bf217 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -166,6 +166,9 @@ struct css_set { /* the default cgroup associated with this css_set */ struct cgroup *dfl_cgrp; + /* internal task count, protected by css_set_lock */ + int nr_tasks; + /* * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the -- cgit v1.2.3 From 33e4f80ee69b5168badf37edbfed796eb48434b9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 12 Jun 2017 22:56:34 +0200 Subject: ACPI / PM: Ignore spurious SCI wakeups from suspend-to-idle The ACPI SCI (System Control Interrupt) is set up as a wakeup IRQ during suspend-to-idle transitions and, consequently, any events signaled through it wake up the system from that state. However, on some systems some of the events signaled via the ACPI SCI while suspended to idle should not cause the system to wake up. In fact, quite often they should just be discarded. Arguably, systems should not resume entirely on such events, but in order to decide which events really should cause the system to resume and which are spurious, it is necessary to resume up to the point when ACPI SCIs are actually handled and processed, which is after executing dpm_resume_noirq() in the system resume path. For this reasons, add a loop around freeze_enter() in which the platforms can process events signaled via multiplexed IRQ lines like the ACPI SCI and add suspend-to-idle hooks that can be used for this purpose to struct platform_freeze_ops. In the ACPI case, the ->wake hook is used for checking if the SCI has triggered while suspended and deferring the interrupt-induced system wakeup until the events signaled through it are actually processed sufficiently to decide whether or not the system should resume. In turn, the ->sync hook allows all of the relevant event queues to be flushed so as to prevent events from being missed due to race conditions. In addition to that, some ACPI code processing wakeup events needs to be modified to use the "hard" version of wakeup triggers, so that it will cause a system resume to happen on device-induced wakeup events even if the "soft" mechanism to prevent the system from suspending is not enabled. However, to preserve the existing behavior with respect to suspend-to-RAM, this only is done in the suspend-to-idle case and only if an SCI has occurred while suspended. Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d9718378a8be..0b1cf32edfd7 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -189,6 +189,8 @@ struct platform_suspend_ops { struct platform_freeze_ops { int (*begin)(void); int (*prepare)(void); + void (*wake)(void); + void (*sync)(void); void (*restore)(void); void (*end)(void); }; @@ -428,7 +430,8 @@ extern unsigned int pm_wakeup_irq; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); -extern void pm_wakeup_clear(void); +extern void pm_system_cancel_wakeup(void); +extern void pm_wakeup_clear(bool reset); extern void pm_system_irq_wakeup(unsigned int irq_number); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); @@ -478,7 +481,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline bool pm_wakeup_pending(void) { return false; } static inline void pm_system_wakeup(void) {} -static inline void pm_wakeup_clear(void) {} +static inline void pm_wakeup_clear(bool reset) {} static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline void lock_system_sleep(void) {} -- cgit v1.2.3 From 132a324ab62fe4fb8d6dcc2ab4eddb0e93b69afe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:36 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_IAR1_EL1 handler Add a handler for reading the guest's view of the ICC_IAR1_EL1 register. This involves finding the highest priority Group-1 interrupt, checking against both PMR and the active group priority, activating the interrupt and setting the group priority as active. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1fa293a37f4a..d70668fae003 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -405,6 +405,7 @@ #define ICH_LR_PHYS_ID_SHIFT 32 #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) #define ICH_LR_PRIORITY_SHIFT 48 +#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) /* These are for GICv2 emulation only */ #define GICH_LR_VIRTUALID (0x3ffUL << 0) -- cgit v1.2.3 From b6f49035b4bf6e2709f2a5fed3107f5438c1fd02 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:37 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_EOIR1_EL1 handler Add a handler for writing the guest's view of the ICC_EOIR1_EL1 register. This involves dropping the priority of the interrupt, and deactivating it if required (EOImode == 0). Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index d70668fae003..1f458ac6f494 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,8 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_EOIcount_SHIFT 27 +#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) #define ICH_VMCR_ACK_CTL_SHIFT 2 #define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) -- cgit v1.2.3 From 9c7bfc288c71068ab323b802dba2eb87fd08b127 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:40 +0100 Subject: KVM: arm64: vgic-v3: Enable trapping of Group-1 system registers In order to be able to trap Group-1 GICv3 system registers, we need to set ICH_HCR_EL2.TALL1 before entering the guest. This is conditionally done after having restored the guest's state, and cleared on exit. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1f458ac6f494..6b05d2ac8c54 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 #define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) -- cgit v1.2.3 From abf55766f7b062234083ff612446ff8d47e2417e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:45 +0100 Subject: KVM: arm64: vgic-v3: Enable trapping of Group-0 system registers In order to be able to trap Group-0 GICv3 system registers, we need to set ICH_HCR_EL2.TALL0 begore entering the guest. This is conditionnaly done after having restored the guest's state, and cleared on exit. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 6b05d2ac8c54..c7f31a962cfc 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 #define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) -- cgit v1.2.3 From ff89511ef29b794d6a9c6b62f5ea76fc013cdae7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:53 +0100 Subject: KVM: arm64: Enable GICv3 common sysreg trapping via command-line Now that we're able to safely handle common sysreg access, let's give the user the opportunity to enable it by passing a specific command-line option (vgic_v3.common_trap). Tested-by: Alexander Graf Acked-by: David Daney Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c7f31a962cfc..6a1f87ff94e2 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TC (1 << 10) #define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 -- cgit v1.2.3 From 91e0bf81258c07aad27a4833368569ce873cd83e Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 14 Jun 2017 17:37:13 +0100 Subject: ACPI/IORT: Remove iort_node_match() Commit 316ca8804ea8 ("ACPI/IORT: Remove linker section for IORT entries probing") removed the linker section for IORT entries probing. Since those IORT entries were the only iort_node_match() interface users, the iort_node_match() became obsolete and can then be removed. Remove the ACPI IORT iort_node_match() interface from the kernel. Acked-by: Marc Zyngier Acked-by: Hanjun Guo Signed-off-by: Lorenzo Pieralisi Cc: Hanjun Guo Signed-off-by: Will Deacon --- include/linux/acpi_iort.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 3ff9acea8616..8379d406ad2e 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -31,7 +31,6 @@ void iort_deregister_domain_token(int trans_id); struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT void acpi_iort_init(void); -bool iort_node_match(u8 type); u32 iort_msi_map_rid(struct device *dev, u32 req_id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); void acpi_configure_pmsi_domain(struct device *dev); @@ -41,7 +40,6 @@ void iort_set_dma_mask(struct device *dev); const struct iommu_ops *iort_iommu_configure(struct device *dev); #else static inline void acpi_iort_init(void) { } -static inline bool iort_node_match(u8 type) { return false; } static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) { return req_id; } static inline struct irq_domain *iort_get_device_domain(struct device *dev, -- cgit v1.2.3 From 466749f13e33d892cf9263d7efbc0ea713c23ed7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 10 Apr 2017 12:27:01 +0200 Subject: gpu: host1x: Flesh out kerneldoc Improve kerneldoc for the public parts of the host1x infrastructure in preparation for adding driver-specific part to the GPU documentation. Acked-by: Daniel Vetter Signed-off-by: Thierry Reding --- include/linux/host1x.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 3d04aa1dc83e..840a8ad627b2 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -32,11 +32,27 @@ enum host1x_class { struct host1x_client; +/** + * struct host1x_client_ops - host1x client operations + * @init: host1x client initialization code + * @exit: host1x client tear down code + */ struct host1x_client_ops { int (*init)(struct host1x_client *client); int (*exit)(struct host1x_client *client); }; +/** + * struct host1x_client - host1x client structure + * @list: list node for the host1x client + * @parent: pointer to struct device representing the host1x controller + * @dev: pointer to struct device backing this host1x client + * @ops: host1x client operations + * @class: host1x class represented by this client + * @channel: host1x channel associated with this client + * @syncpts: array of syncpoints requested for this client + * @num_syncpts: number of syncpoints requested for this client + */ struct host1x_client { struct list_head list; struct device *parent; @@ -251,6 +267,15 @@ void host1x_job_unpin(struct host1x_job *job); struct host1x_device; +/** + * struct host1x_driver - host1x logical device driver + * @driver: core driver + * @subdevs: table of OF device IDs matching subdevices for this driver + * @list: list node for the driver + * @probe: called when the host1x logical device is probed + * @remove: called when the host1x logical device is removed + * @shutdown: called when the host1x logical device is shut down + */ struct host1x_driver { struct device_driver driver; -- cgit v1.2.3 From d0fbbdff2e19aabccc1107b7e12ab9f3cbf626ef Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:27 +0300 Subject: drm/tegra: Correct copying of waitchecks and disable them in the 'submit' IOCTL The waitchecks along with multiple syncpoints per submit are not ready for use yet, let's forbid them for now. Signed-off-by: Dmitry Osipenko Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 840a8ad627b2..ba0b245da732 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -193,6 +193,13 @@ struct host1x_reloc { unsigned long shift; }; +struct host1x_waitchk { + struct host1x_bo *bo; + u32 offset; + u32 syncpt_id; + u32 thresh; +}; + struct host1x_job { /* When refcount goes to zero, job can be freed */ struct kref ref; -- cgit v1.2.3 From 0f563a4bf66e5182f0882efee398f7e6bc0bb1be Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:37 +0300 Subject: gpu: host1x: Forbid unrelated SETCLASS opcode in the firewall Several channels could be made to write the same unit concurrently via the SETCLASS opcode, trusting userspace is a bad idea. It should be possible to drop the per-client channel reservation and add a per-unit locking by inserting MLOCK's to the command stream to re-allow the SETCLASS opcode, but it will be much more work. Let's forbid the unit-unrelated class changes for now. Signed-off-by: Dmitry Osipenko Reviewed-by: Erik Faye-Lund Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index ba0b245da732..b5358f855d9e 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -251,6 +251,9 @@ struct host1x_job { /* Check if register is marked as an address reg */ int (*is_addr_reg)(struct device *dev, u32 reg, u32 class); + /* Check if class belongs to the unit */ + int (*is_valid_class)(u32 class); + /* Request a SETCLASS to this class */ u32 class; -- cgit v1.2.3 From a2b78b0d53f0808ebc2a0368b589a5cb6b672294 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 15 Jun 2017 02:18:38 +0300 Subject: gpu: host1x: Correct swapped arguments in the is_addr_reg() definition Arguments of the .is_addr_reg() are swapped in the definition of the function, that is quite confusing. Signed-off-by: Dmitry Osipenko Reviewed-by: Erik Faye-Lund Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index b5358f855d9e..476da0e06bb2 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -249,7 +249,7 @@ struct host1x_job { u8 *gather_copy_mapped; /* Check if register is marked as an address reg */ - int (*is_addr_reg)(struct device *dev, u32 reg, u32 class); + int (*is_addr_reg)(struct device *dev, u32 class, u32 reg); /* Check if class belongs to the unit */ int (*is_valid_class)(u32 class); -- cgit v1.2.3 From 8474b02531c4881a762c52ef869c52429e38633f Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 15 Jun 2017 02:18:42 +0300 Subject: gpu: host1x: Refactor channel allocation code This is largely a rewrite of the Host1x channel allocation code, bringing several changes: - The previous code could deadlock due to an interaction between the 'reflock' mutex and CDMA timeout handling. This gets rid of the mutex. - Support for more than 32 channels, required for Tegra186 - General refactoring, including better encapsulation of channel ownership handling into channel.c Signed-off-by: Mikko Perttunen Reviewed-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- include/linux/host1x.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 476da0e06bb2..630b1a98ab58 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -172,7 +172,6 @@ struct host1x_channel; struct host1x_job; struct host1x_channel *host1x_channel_request(struct device *dev); -void host1x_channel_free(struct host1x_channel *channel); struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); void host1x_channel_put(struct host1x_channel *channel); int host1x_job_submit(struct host1x_job *job); -- cgit v1.2.3 From 97f6ef6464dbd235a4d9bdfc05d949aab24fc927 Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Wed, 24 May 2017 16:39:55 +0800 Subject: nvme-pci: remap BAR0 to cover admin CQ doorbell for large stride The existing driver initially maps 8192 bytes of BAR0 which is intended to cover doorbells of admin SQ and CQ. However, if a large stride, e.g. 10, is used, the doorbell of admin CQ will be out of 8192 bytes. Consequently, a page fault will be raised when the admin CQ doorbell is accessed in nvme_configure_admin_queue(). This patch fixes this issue by remapping BAR0 before accessing admin CQ doorbell if the initial mapping is not enough. Signed-off-by: Xu Yu Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 51ca4771be2c..706a0fbfe28e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -102,6 +102,7 @@ enum { NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ + NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */ }; #define NVME_CAP_MQES(cap) ((cap) & 0xffff) -- cgit v1.2.3 From 0945e56994ac855d01c4aecf69bded65c751b894 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 7 Jun 2017 11:45:28 +0200 Subject: scatterlist: add sg_zero_buffer() helper The sg_zero_buffer() helper is used to zero fill an area in a SG list. Signed-off-by: Johannes Thumshirn Reviewed-by: Sagi Grimberg [hch: renamed to sg_zero_buffer] Signed-off-by: Christoph Hellwig --- include/linux/scatterlist.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index cb3c8fe6acd7..4b3286ac60c8 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -278,6 +278,8 @@ size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents, const void *buf, size_t buflen, off_t skip); size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip); +size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, + size_t buflen, off_t skip); /* * Maximum number of entries that will be allocated in one piece, if -- cgit v1.2.3 From 0add5e8e588c65c5ac6a3255f624260bf889128d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 7 Jun 2017 11:45:29 +0200 Subject: nvmet: use NVME_IDENTIFY_DATA_SIZE Use NVME_IDENTIFY_DATA_SIZE define instead of hard coding the magic 4096 value. Signed-off-by: Johannes Thumshirn Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke [hch: converted three more users] Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 706a0fbfe28e..782d557c5535 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -665,6 +665,8 @@ struct nvme_identify { __u32 rsvd11[5]; }; +#define NVME_IDENTIFY_DATA_SIZE 4096 + struct nvme_features { __u8 opcode; __u8 flags; -- cgit v1.2.3 From af8b86e9a7ffb9528e745b7ea25b18545699482c Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 7 Jun 2017 11:45:30 +0200 Subject: nvme: introduce NVMe Namespace Identification Descriptor structures Signed-off-by: Johannes Thumshirn Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 782d557c5535..f2344aa923e8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -290,6 +290,7 @@ enum { NVME_ID_CNS_NS = 0x00, NVME_ID_CNS_CTRL = 0x01, NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, + NVME_ID_CNS_NS_DESC_LIST = 0x03, NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_CTRL_NS_LIST = 0x12, @@ -316,6 +317,22 @@ enum { NVME_NS_DPS_PI_TYPE3 = 3, }; +struct nvme_ns_id_desc { + __u8 nidt; + __u8 nidl; + __le16 reserved; +}; + +#define NVME_NIDT_EUI64_LEN 8 +#define NVME_NIDT_NGUID_LEN 16 +#define NVME_NIDT_UUID_LEN 16 + +enum { + NVME_NIDT_EUI64 = 0x01, + NVME_NIDT_NGUID = 0x02, + NVME_NIDT_UUID = 0x03, +}; + struct nvme_smart_log { __u8 critical_warning; __u8 temperature[2]; -- cgit v1.2.3 From c61d788b8b1fe57aaf03ac0b5c636c7388ebfd20 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 7 Jun 2017 11:45:36 +0200 Subject: nvmet: allow overriding the NVMe VS via configfs Allow overriding the announced NVMe Version of a via configfs. This is particularly helpful when debugging new features for the host or target side without bumping the hard coded version (as the target might not be fully compliant to the announced version yet). Signed-off-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Guan Junxiong Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f2344aa923e8..acb484935603 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1085,4 +1085,8 @@ struct nvme_completion { #define NVME_VS(major, minor, tertiary) \ (((major) << 16) | ((minor) << 8) | (tertiary)) +#define NVME_MAJOR(ver) ((ver) >> 16) +#define NVME_MINOR(ver) (((ver) >> 8) & 0xff) +#define NVME_TERTIARY(ver) ((ver) & 0xff) + #endif /* _LINUX_NVME_H */ -- cgit v1.2.3 From 435e809058bafaa8f0bf8f55f37508b01734c9a5 Mon Sep 17 00:00:00 2001 From: Guan Junxiong Date: Tue, 13 Jun 2017 09:26:15 +0800 Subject: nvme: add fields into identify controller data structure Add the new to NVMe 1.3 fields EDSTT, DSTO, FWUG, HCTMA, MNTMT, MXTMT, and SANICAP into the idenfity controller data structure. Signed-off-by: Guan Junxiong Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index acb484935603..6d476f242ee6 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -209,9 +209,15 @@ struct nvme_id_ctrl { __u8 tnvmcap[16]; __u8 unvmcap[16]; __le32 rpmbs; - __u8 rsvd316[4]; + __le16 edstt; + __u8 dsto; + __u8 fwug; __le16 kas; - __u8 rsvd322[190]; + __le16 hctma; + __le16 mntmt; + __le16 mxtmt; + __le32 sanicap; + __u8 rsvd332[180]; __u8 sqes; __u8 cqes; __le16 maxcmd; -- cgit v1.2.3 From 3c4d7559159bfe1e3b94df3a657b2cda3a34e218 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 14 Jun 2017 11:37:39 -0700 Subject: tls: kernel TLS support Software implementation of transport layer security, implemented using ULP infrastructure. tcp proto_ops are replaced with tls equivalents of sendmsg and sendpage. Only symmetric crypto is done in the kernel, keys are passed by setsockopt after the handshake is complete. All control messages are supported via CMSG data - the actual symmetric encryption is the same, just the message type needs to be passed separately. For user API, please see Documentation patch. Pieces that can be shared between hw and sw implementation are in tls_main.c Signed-off-by: Boris Pismenny Signed-off-by: Ilya Lesokhin Signed-off-by: Aviad Yehezkel Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 082027457825..8b13db5163cc 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -334,6 +334,7 @@ struct ucred { #define SOL_ALG 279 #define SOL_NFC 280 #define SOL_KCM 281 +#define SOL_TLS 282 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From 83ad357dee467f63574de35752bc40033deab30e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Jun 2017 22:17:20 +0200 Subject: skbuff: make skb_put_zero() return void It's nicer to return void, since then there's no need to cast to any structures. Currently none of the users have a cast, but a number of future conversions do. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1151b50892d1..01ea64d0783a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1904,9 +1904,9 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) return tmp; } -static inline unsigned char *skb_put_zero(struct sk_buff *skb, unsigned int len) +static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len) { - unsigned char *tmp = skb_put(skb, len); + void *tmp = skb_put(skb, len); memset(tmp, 0, len); -- cgit v1.2.3 From a9bc67de0c5713a8675bfe33bfe9cb36c7934589 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 14 Jun 2017 21:04:14 +0200 Subject: regulator: tps65910: wire up sleep control configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables configuring the PMIC's sleep mode via device-tree. A pointer indirection to sleep mode data is removed, as it simplifies the implementation slightly. In current kernel tree, platform data structure is not used outside MFD cell drivers. Signed-off-by: Michał Mirosław Signed-off-by: Mark Brown --- include/linux/mfd/tps65910.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index ffb21e79204d..deffdcd0236f 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -879,7 +879,7 @@ struct tps65910_board { bool en_ck32k_xtal; bool en_dev_slp; bool pm_off; - struct tps65910_sleep_keepon_data *slp_keepon; + struct tps65910_sleep_keepon_data slp_keepon; bool en_gpio_sleep[TPS6591X_MAX_NUM_GPIO]; unsigned long regulator_ext_sleep_control[TPS65910_NUM_REGS]; struct regulator_init_data *tps65910_pmic_init_data[TPS65910_NUM_REGS]; -- cgit v1.2.3 From bd10838af2d918994a27c702e9910fb71bb9c304 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 28 May 2017 15:24:17 +0300 Subject: net/mlx5: Fix some spelling mistakes Fixed few places where endianness was misspelled and one spot whwere output was: CHECK: 'endianess' may be misspelled - perhaps 'endianness'? CHECK: 'ouput' may be misspelled - perhaps 'output'? Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 32b044e953d2..1fd144662491 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -661,9 +661,9 @@ enum { struct mlx5_ifc_atomic_caps_bits { u8 reserved_at_0[0x40]; - u8 atomic_req_8B_endianess_mode[0x2]; + u8 atomic_req_8B_endianness_mode[0x2]; u8 reserved_at_42[0x4]; - u8 supported_atomic_req_8B_endianess_mode_1[0x1]; + u8 supported_atomic_req_8B_endianness_mode_1[0x1]; u8 reserved_at_47[0x19]; -- cgit v1.2.3 From 432609a4cdfb1c3e3a58e6e37b3501e42bfc50ab Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 14 Jun 2017 11:52:33 +0300 Subject: net/mlx5e: Move and optimize query out of buffer function Move "query queue counter out of buffer" helper function out of qp.c to en_main.c, since mlx5e netdev driver is the only one to use it. Also allocate the output buffer on the stack instead of the heap, to reduce number of heap allocs on update_stats work. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Cc: kernel-team@fb.com --- include/linux/mlx5/qp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index bef80d0a0e30..1f637f4d1265 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -569,8 +569,6 @@ int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id); int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id); int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, int reset, void *out, int out_size); -int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, - u32 *out_of_buffer); static inline const char *mlx5_qp_type_str(int type) { -- cgit v1.2.3 From 4525abeaae54560254a1bb8970b3d4c225d32ef4 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 9 Feb 2017 13:20:46 +0200 Subject: net/mlx5: Expose command polling interface Add a new interface for commands execution that allows the caller to wait for the command's completion in a busy-wait loop (polling mode). This is useful if we want to execute a command in a polling mode while the driver is working in events mode for the rest of the commands. This interface will be used in the downstream patches. Signed-off-by: Majd Dibbiny Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6ea2f5734e37..bf15e87da8fa 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -817,6 +817,7 @@ struct mlx5_cmd_work_ent { u64 ts1; u64 ts2; u16 op; + bool polling; }; struct mlx5_pas { @@ -915,6 +916,8 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context); +int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); -- cgit v1.2.3 From 8812c24d28f4972c4f2b9998bf30b1f2a1b62adf Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 9 Feb 2017 14:20:12 +0200 Subject: net/mlx5: Add fast unload support in shutdown flow Adding a support to flush all HW resources with one FW command and skip all the heavy unload flows of the driver on kernel shutdown. There's no need to free all the SW context since a new fresh kernel will be loaded afterwards. Regarding the FW resources, they should be closed, otherwise we will have leakage in the FW. To accelerate this flow, we execute one command in the beginning that tells the FW that the driver isn't going to close any of the FW resources and asks the FW to clean up everything. Once the commands complete, it's safe to close the PCI resources and finish the routine. Signed-off-by: Majd Dibbiny Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1fd144662491..e86ef880a149 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -801,7 +801,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_indirection[0x8]; u8 fixed_buffer_size[0x1]; u8 log_max_mrw_sz[0x7]; - u8 reserved_at_110[0x2]; + u8 force_teardown[0x1]; + u8 reserved_at_111[0x1]; u8 log_max_bsf_list_size[0x6]; u8 umr_extended_translation_offset[0x1]; u8 null_mkey[0x1]; @@ -3094,18 +3095,25 @@ struct mlx5_ifc_tsar_element_bits { u8 reserved_at_10[0x10]; }; +enum { + MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_SUCCESS = 0x0, + MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1, +}; + struct mlx5_ifc_teardown_hca_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x3f]; + + u8 force_state[0x1]; }; enum { MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0, - MLX5_TEARDOWN_HCA_IN_PROFILE_PANIC_CLOSE = 0x1, + MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE = 0x1, }; struct mlx5_ifc_teardown_hca_in_bits { -- cgit v1.2.3 From c12c48ce869d72029d70666f615cbd8f67fc14e9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Jun 2017 10:59:15 +0900 Subject: libnvdimm, label: add v1.2 interleave-set-cookie algorithm The interleave-set-cookie algorithm is extended to incorporate all the same components that are used to generate an nvdimm unique-id. For backwards compatibility we still maintain the old v1.1 definition. Reported-by: Nicholas Moulin Reported-by: Kaushik Kanetkar Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 6c807017128d..722cdf21429f 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -71,7 +71,10 @@ struct nd_cmd_desc { }; struct nd_interleave_set { - u64 cookie; + /* v1.1 definition of the interleave-set-cookie algorithm */ + u64 cookie1; + /* v1.2 definition of the interleave-set-cookie algorithm */ + u64 cookie2; /* compatibility with initial buggy Linux implementation */ u64 altcookie; }; -- cgit v1.2.3 From f979b13c3cc51584882bffa32965f34e5afa3b9b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Jun 2017 12:12:07 +0900 Subject: libnvdimm, label: honor the lba size specified in v1.2 labels Previously we only honored the lba size for blk-aperture mode namespaces. For pmem namespaces the lba size was just assumed to be 512. With the new v1.2 label definition and compatibility with other operating environments, the ->lbasize property is now respected for pmem namespaces. Cc: Ross Zwisler Signed-off-by: Dan Williams --- include/linux/nd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index 194b8e002ea7..d8f5023b49ae 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -75,12 +75,14 @@ struct nd_namespace_io { /** * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory * @nsio: device and system physical address range to drive + * @lbasize: logical sector size for the namespace in block-device-mode * @alt_name: namespace name supplied in the dimm label * @uuid: namespace name supplied in the dimm label * @id: ida allocated id */ struct nd_namespace_pmem { struct nd_namespace_io nsio; + unsigned long lbasize; char *alt_name; u8 *uuid; int id; -- cgit v1.2.3 From faec6f8a1cd2c44e439de35ab3328c5cf7bf52d8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jun 2017 11:10:51 -0700 Subject: libnvdimm, label: populate the type_guid property for v1.2 namespaces The type_guid refers to the "Address Range Type GUID" for the region backing a namespace as defined the ACPI NFIT (NVDIMM Firmware Interface Table). This 'type' identifier specifies an access mechanism for the given namespace. This capability replaces the confusing usage of the 'NSLABEL_FLAG_LOCAL' flag to indicate a block-aperture-mode namespace. Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 722cdf21429f..4b9f178c82e6 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -17,6 +17,7 @@ #include #include #include +#include enum { /* when a dimm supports both PMEM and BLK access a label is required */ @@ -77,6 +78,8 @@ struct nd_interleave_set { u64 cookie2; /* compatibility with initial buggy Linux implementation */ u64 altcookie; + + guid_t type_guid; }; struct nd_mapping_desc { -- cgit v1.2.3 From b3fde74ea195d2f9f49830a29f971a0aab4cd67a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Jun 2017 10:18:39 +0900 Subject: libnvdimm, label: add address abstraction identifiers Starting with v1.2 labels, 'address abstractions' can be hinted via an address abstraction id that implies an info-block format. The standard address abstraction in the specification is the v2 format of the Block-Translation-Table (BTT). Support for that is saved for a later patch, for now we add support for the Linux supported address abstractions BTT (v1), PFN, and DAX. The new 'holder_class' attribute for namespace devices is added for tooling to specify the 'abstraction_guid' to store in the namespace label. For v1.1 labels this field is undefined and any setting of 'holder_class' away from the default 'none' value will only have effect until the driver is unloaded. Setting 'holder_class' requires that whatever device tries to claim the namespace must be of the specified class. Cc: Vishal Verma Signed-off-by: Dan Williams --- include/linux/nd.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index d8f5023b49ae..96069c543890 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -21,6 +21,14 @@ enum nvdimm_event { NVDIMM_REVALIDATE_POISON, }; +enum nvdimm_claim_class { + NVDIMM_CCLASS_NONE, + NVDIMM_CCLASS_BTT, + NVDIMM_CCLASS_PFN, + NVDIMM_CCLASS_DAX, + NVDIMM_CCLASS_UNKNOWN, +}; + struct nd_device_driver { struct device_driver drv; unsigned long type; @@ -41,12 +49,14 @@ static inline struct nd_device_driver *to_nd_device_driver( * @force_raw: ignore other personalities for the namespace (e.g. btt) * @dev: device model node * @claim: when set a another personality has taken ownership of the namespace + * @claim_class: restrict claim type to a given class * @rw_bytes: access the raw namespace capacity with byte-aligned transfers */ struct nd_namespace_common { int force_raw; struct device dev; struct device *claim; + enum nvdimm_claim_class claim_class; int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, void *buf, size_t size, int rw, unsigned long flags); }; -- cgit v1.2.3 From fec53774fd043038e57ac737d90e8d58975d6e92 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 21:56:49 -0700 Subject: filesystem-dax: convert to dax_copy_from_iter() Now that all possible providers of the dax_operations copy_from_iter method are implemented, switch filesytem-dax to call the driver rather than copy_to_iter_pmem. Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/pmem.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 71ecf3d46aac..9d542a5600e4 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,13 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - BUG(); - return 0; -} - static inline void arch_clear_pmem(void *addr, size_t size) { BUG(); @@ -79,23 +72,6 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) memcpy(dst, src, n); } -/** - * copy_from_iter_pmem - copy data from an iterator to PMEM - * @addr: PMEM destination address - * @bytes: number of bytes to copy - * @i: iterator with source data - * - * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline size_t copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - if (arch_has_pmem_api()) - return arch_copy_from_iter_pmem(addr, bytes, i); - return copy_from_iter_nocache(addr, bytes, i); -} - /** * clear_pmem - zero a PMEM memory range * @addr: virtual start address -- cgit v1.2.3 From 3c1cebff23cdca01c421411e953a9e239f2b9ef9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:58:19 -0700 Subject: dax, pmem: introduce an optional 'flush' dax_operation Filesystem-DAX flushes caches whenever it writes to the address returned through dax_direct_access() and when writing back dirty radix entries. That flushing is only required in the pmem case, so add a dax operation to allow pmem to take this extra action, but skip it for other dax capable devices that do not provide a flush routine. An example for this differentiation might be a volatile ram disk where there is no expectation of persistence. In fact the pmem driver itself might front such an address range specified by the NFIT. So, this "no flush" property might be something passed down by the bus / libnvdimm. Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/dax.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 28e398f8c59e..407dd3ff6e54 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -19,6 +19,8 @@ struct dax_operations { /* copy_from_iter: dax-driver override for default copy_from_iter */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); + /* flush: optional driver-specific cache management after writes */ + void (*flush)(struct dax_device *, pgoff_t, void *, size_t); }; #if IS_ENABLED(CONFIG_DAX) -- cgit v1.2.3 From abebfbe2f7315dd3ec9a0c69596a76e32beb5749 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 13:02:52 -0700 Subject: dm: add ->flush() dax operation support Allow device-mapper to route flush operations to the per-target implementation. In order for the device stacking to work we need a dax_dev and a pgoff relative to that device. This gives each layer of the stack the information it needs to look up the operation pointer for the next level. This conceptually allows for an array of mixed device drivers with varying flush implementations. Reviewed-by: Toshi Kani Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- include/linux/dax.h | 2 ++ include/linux/device-mapper.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 407dd3ff6e54..1f6b6072af64 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -82,6 +82,8 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t size); /* * We use lowest available bit in exceptional entry for locking, one bit for diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 11c8a0a92f9c..67bfe8ddcb32 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -134,6 +134,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, + size_t size); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -184,6 +186,7 @@ struct target_type { dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; dm_dax_copy_from_iter_fn dax_copy_from_iter; + dm_dax_flush_fn dax_flush; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From 81f558701ae8d5677635118751b1b4043094c7e9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 13:12:20 -0700 Subject: x86, dax: replace clear_pmem() with open coded memset + dax_ops->flush The clear_pmem() helper simply combines a memset() plus a cache flush. Now that the flush routine is optionally provided by the dax device driver we can avoid unnecessary cache management on dax devices fronting volatile memory. With clear_pmem() gone we can follow on with a patch to make pmem cache management completely defined within the pmem driver. Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/pmem.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 9d542a5600e4..772bd02a5b52 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,11 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline void arch_clear_pmem(void *addr, size_t size) -{ - BUG(); -} - static inline void arch_wb_cache_pmem(void *addr, size_t size) { BUG(); @@ -72,22 +67,6 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) memcpy(dst, src, n); } -/** - * clear_pmem - zero a PMEM memory range - * @addr: virtual start address - * @size: number of bytes to zero - * - * Write zeros into the memory range starting at 'addr' for 'size' bytes. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline void clear_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_clear_pmem(addr, size); - else - memset(addr, 0, size); -} - /** * invalidate_pmem - flush a pmem range from the cache hierarchy * @addr: virtual start address -- cgit v1.2.3 From 4e4f00a9b51a1c52ebdd728a1caeb3b9fe48c39d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 22:40:44 -0700 Subject: x86, dax, libnvdimm: remove wb_cache_pmem() indirection With all handling of the CONFIG_ARCH_HAS_PMEM_API case being moved to libnvdimm and the pmem driver directly we do not need to provide global wrappers and fallbacks in the CONFIG_ARCH_HAS_PMEM_API=n case. The pmem driver will simply not link to arch_wb_cache_pmem() in that case. Same as before, pmem flushing is only defined for x86_64, via clean_cache_range(), but it is straightforward to add other archs in the future. arch_wb_cache_pmem() is an exported function since the pmem module needs to find it, but it is privately declared in drivers/nvdimm/pmem.h because there are no consumers outside of the pmem driver. Cc: Cc: Jan Kara Cc: Jeff Moyer Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Oliver O'Halloran Cc: Matthew Wilcox Cc: Ross Zwisler Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/pmem.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 772bd02a5b52..33ae761f010a 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,11 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline void arch_wb_cache_pmem(void *addr, size_t size) -{ - BUG(); -} - static inline void arch_invalidate_pmem(void *addr, size_t size) { BUG(); @@ -80,18 +75,4 @@ static inline void invalidate_pmem(void *addr, size_t size) if (arch_has_pmem_api()) arch_invalidate_pmem(addr, size); } - -/** - * wb_cache_pmem - write back processor cache for PMEM memory range - * @addr: virtual start address - * @size: number of bytes to write back - * - * Write back the processor cache range starting at 'addr' for 'size' bytes. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline void wb_cache_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_wb_cache_pmem(addr, size); -} #endif /* __PMEM_H__ */ -- cgit v1.2.3 From 6b8190d61a622e095f04451437953acd2d74b371 Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Thu, 15 Jun 2017 10:44:30 -0600 Subject: nvme: implement NS Optimal IO Boundary from 1.3 Spec The NVMe 1.3 spec introduces Namespace Optimal IO Boundaries (NOIOB), which standardizes the stripe mechanism we currently have quirks for. This patch implements the necessary logic to handle this new feature. Signed-off-by: Scott Bauer Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6d476f242ee6..291587a0743f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -282,7 +282,7 @@ struct nvme_id_ns { __le16 nabsn; __le16 nabo; __le16 nabspf; - __u16 rsvd46; + __le16 noiob; __u8 nvmcap[16]; __u8 rsvd64[40]; __u8 nguid[16]; -- cgit v1.2.3 From 59ae1d127ac0ae404baf414c434ba2651b793f46 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Jun 2017 14:29:20 +0200 Subject: networking: introduce and use skb_put_data() A common pattern with skb_put() is to just want to memcpy() some data into the new space, introduce skb_put_data() for this. An spatch similar to the one for skb_put_zero() converts many of the places using it: @@ identifier p, p2; expression len, skb, data; type t, t2; @@ ( -p = skb_put(skb, len); +p = skb_put_data(skb, data, len); | -p = (t)skb_put(skb, len); +p = skb_put_data(skb, data, len); ) ( p2 = (t2)p; -memcpy(p2, data, len); | -memcpy(p, data, len); ) @@ type t, t2; identifier p, p2; expression skb, data; @@ t *p; ... ( -p = skb_put(skb, sizeof(t)); +p = skb_put_data(skb, data, sizeof(t)); | -p = (t *)skb_put(skb, sizeof(t)); +p = skb_put_data(skb, data, sizeof(t)); ) ( p2 = (t2)p; -memcpy(p2, data, sizeof(*p)); | -memcpy(p, data, sizeof(*p)); ) @@ expression skb, len, data; @@ -memcpy(skb_put(skb, len), data, len); +skb_put_data(skb, data, len); (again, manually post-processed to retain some comments) Reviewed-by: Stephen Hemminger Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/mISDNif.h | 2 +- include/linux/skbuff.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index ac02c54520e9..a7330eb3ec64 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -554,7 +554,7 @@ _alloc_mISDN_skb(u_int prim, u_int id, u_int len, void *dp, gfp_t gfp_mask) if (!skb) return NULL; if (len) - memcpy(skb_put(skb, len), dp, len); + skb_put_data(skb, dp, len); hh = mISDN_HEAD_P(skb); hh->prim = prim; hh->id = id; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 01ea64d0783a..5af5385a0e72 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1913,6 +1913,16 @@ static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len) return tmp; } +static inline void *skb_put_data(struct sk_buff *skb, const void *data, + unsigned int len) +{ + void *tmp = skb_put(skb, len); + + memcpy(tmp, data, len); + + return tmp; +} + unsigned char *skb_push(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) { -- cgit v1.2.3 From 4df864c1d9afb46e2461a9f808d9f11a42d31bad Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Jun 2017 14:29:21 +0200 Subject: networking: make skb_put & friends return void pointers It seems like a historic accident that these return unsigned char *, and in many places that means casts are required, more often than not. Make these functions (skb_put, __skb_put and pskb_put) return void * and remove all the casts across the tree, adding a (u8 *) cast only where the unsigned char pointer was used directly, all done with the following spatch: @@ expression SKB, LEN; typedef u8; identifier fn = { skb_put, __skb_put }; @@ - *(fn(SKB, LEN)) + *(u8 *)fn(SKB, LEN) @@ expression E, SKB, LEN; identifier fn = { skb_put, __skb_put }; type T; @@ - E = ((T *)(fn(SKB, LEN))) + E = fn(SKB, LEN) which actually doesn't cover pskb_put since there are only three users overall. A handful of stragglers were converted manually, notably a macro in drivers/isdn/i4l/isdn_bsdcomp.c and, oddly enough, one of the many instances in net/bluetooth/hci_sock.c. In the former file, I also had to fix one whitespace problem spatch introduced. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5af5385a0e72..454ea37dddbb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1893,11 +1893,11 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) /* * Add data to an sk_buff */ -unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); -unsigned char *skb_put(struct sk_buff *skb, unsigned int len); -static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) +void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); +void *skb_put(struct sk_buff *skb, unsigned int len); +static inline void *__skb_put(struct sk_buff *skb, unsigned int len) { - unsigned char *tmp = skb_tail_pointer(skb); + void *tmp = skb_tail_pointer(skb); SKB_LINEAR_ASSERT(skb); skb->tail += len; skb->len += len; -- cgit v1.2.3 From af72868b9070d1b843c829f0d0d0b22c04a20815 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Jun 2017 14:29:22 +0200 Subject: networking: make skb_pull & friends return void pointers It seems like a historic accident that these return unsigned char *, and in many places that means casts are required, more often than not. Make these functions return void * and remove all the casts across the tree, adding a (u8 *) cast only where the unsigned char pointer was used directly, all done with the following spatch: @@ expression SKB, LEN; typedef u8; identifier fn = { skb_pull, __skb_pull, skb_pull_inline, __pskb_pull_tail, __pskb_pull, pskb_pull }; @@ - *(fn(SKB, LEN)) + *(u8 *)fn(SKB, LEN) @@ expression E, SKB, LEN; identifier fn = { skb_pull, __skb_pull, skb_pull_inline, __pskb_pull_tail, __pskb_pull, pskb_pull }; type T; @@ - E = ((T *)(fn(SKB, LEN))) + E = fn(SKB, LEN) Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/skbuff.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 454ea37dddbb..ac9d10dadd1a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1931,22 +1931,22 @@ static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) return skb->data; } -unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); -static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) +void *skb_pull(struct sk_buff *skb, unsigned int len); +static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { skb->len -= len; BUG_ON(skb->len < skb->data_len); return skb->data += len; } -static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int len) +static inline void *skb_pull_inline(struct sk_buff *skb, unsigned int len) { return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); } -unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); +void *__pskb_pull_tail(struct sk_buff *skb, int delta); -static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) +static inline void *__pskb_pull(struct sk_buff *skb, unsigned int len) { if (len > skb_headlen(skb) && !__pskb_pull_tail(skb, len - skb_headlen(skb))) @@ -1955,7 +1955,7 @@ static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) return skb->data += len; } -static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len) +static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) { return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); } @@ -2938,7 +2938,7 @@ static inline void skb_postpush_rcsum(struct sk_buff *skb, __skb_postpush_rcsum(skb, start, len, 0); } -unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); +void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); /** * skb_push_rcsum - push skb and update receive checksum -- cgit v1.2.3 From d58ff35122847a83ba55394e2ae3a1527b6febf5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Jun 2017 14:29:23 +0200 Subject: networking: make skb_push & __skb_push return void pointers It seems like a historic accident that these return unsigned char *, and in many places that means casts are required, more often than not. Make these functions return void * and remove all the casts across the tree, adding a (u8 *) cast only where the unsigned char pointer was used directly, all done with the following spatch: @@ expression SKB, LEN; typedef u8; identifier fn = { skb_push, __skb_push, skb_push_rcsum }; @@ - *(fn(SKB, LEN)) + *(u8 *)fn(SKB, LEN) @@ expression E, SKB, LEN; identifier fn = { skb_push, __skb_push, skb_push_rcsum }; type T; @@ - E = ((T *)(fn(SKB, LEN))) + E = fn(SKB, LEN) @@ expression SKB, LEN; identifier fn = { skb_push, __skb_push, skb_push_rcsum }; @@ - fn(SKB, LEN)[0] + *(u8 *)fn(SKB, LEN) Note that the last part there converts from push(...)[0] to the more idiomatic *(u8 *)push(...). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 +- include/linux/skbuff.h | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 283dc2f5364d..5e6a2d4dc366 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -318,7 +318,7 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, if (skb_cow_head(skb, VLAN_HLEN) < 0) return -ENOMEM; - veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN); + veth = skb_push(skb, VLAN_HLEN); /* Move the mac addresses to the beginning of the new header. */ memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ac9d10dadd1a..46bd514e719c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1923,8 +1923,8 @@ static inline void *skb_put_data(struct sk_buff *skb, const void *data, return tmp; } -unsigned char *skb_push(struct sk_buff *skb, unsigned int len); -static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) +void *skb_push(struct sk_buff *skb, unsigned int len); +static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; skb->len += len; @@ -2951,8 +2951,7 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); * that the checksum difference is zero (e.g., a valid IP header) * or you are setting ip_summed to CHECKSUM_NONE. */ -static inline unsigned char *skb_push_rcsum(struct sk_buff *skb, - unsigned int len) +static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len) { skb_push(skb, len); skb_postpush_rcsum(skb, skb->data, len); -- cgit v1.2.3 From 634fef61076d644b989b86abc2f560d81a089a31 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Jun 2017 14:29:24 +0200 Subject: networking: add and use skb_put_u8() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Joe and Bjørn suggested that it'd be nicer to not have the cast in the fairly common case of doing *(u8 *)skb_put(skb, 1) = c; Add skb_put_u8() for this case, and use it across the code, using the following spatch: @@ expression SKB, C, S; typedef u8; identifier fn = {skb_put}; fresh identifier fn2 = fn ## "_u8"; @@ - *(u8 *)fn(SKB, S) = C; + fn2(SKB, C); Note that due to the "S", the spatch isn't perfect, it should have checked that S is 1, but there's also places that use a sizeof expression like sizeof(var) or sizeof(u8) etc. Turns out that nobody ever did something like *(u8 *)skb_put(skb, 2) = c; which would be wrong anyway since the second byte wouldn't be initialized. Suggested-by: Joe Perches Suggested-by: Bjørn Mork Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 46bd514e719c..852feacf4bbf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1923,6 +1923,11 @@ static inline void *skb_put_data(struct sk_buff *skb, const void *data, return tmp; } +static inline void skb_put_u8(struct sk_buff *skb, u8 val) +{ + *(u8 *)skb_put(skb, 1) = val; +} + void *skb_push(struct sk_buff *skb, unsigned int len); static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { -- cgit v1.2.3 From 58038695e62b4473e4d70e1503933579c640cd52 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Jun 2017 17:29:09 -0700 Subject: net: Add IFLA_XDP_PROG_ID Expose prog_id through IFLA_XDP_PROG_ID. This patch makes modification to generic_xdp. The later patches will modify other xdp-supported drivers. prog_id is added to struct net_dev_xdp. iproute2 patch will be followed. Here is how the 'ip link' will look like: > ip link show eth0 3: eth0: mtu 1500 xdp(prog_id:1) qdisc fq_codel state UP mode DEFAULT group default qlen 1000 Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ad98a83f1332..7c7118b3bd69 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -824,7 +824,10 @@ struct netdev_xdp { struct netlink_ext_ack *extack; }; /* XDP_QUERY_PROG */ - bool prog_attached; + struct { + bool prog_attached; + u32 prog_id; + }; }; }; @@ -3302,7 +3305,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op); +bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3 From 68c35ea25bdd4ad10445c4c02f7d48b3dccab8cc Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 16 May 2017 17:46:48 +0200 Subject: mfd: cros_ec: Add helper for event notifier. Add cros_ec_get_event() entry point to retrieve event within functions called by the notifier. Signed-off-by: Gwendal Grignou Signed-off-by: Enric Balletbo i Serra Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 28baee63eaf6..b61b2e013698 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -300,6 +300,16 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev); */ int cros_ec_get_next_event(struct cros_ec_device *ec_dev); +/** + * cros_ec_get_host_event - Return a mask of event set by the EC. + * + * When MKBP is supported, when the EC raises an interrupt, + * We collect the events raised and call the functions in the ec notifier. + * + * This function is a helper to know which events are raised. + */ +u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); + /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; extern struct attribute_group cros_ec_lightbar_attr_group; -- cgit v1.2.3 From 0aa877c558477e5c4b0faaa618cfd41f8c0b3319 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 16 May 2017 17:46:48 +0200 Subject: mfd: cros_ec: Add EC console read structures definitions ec_params_console_read_v1 is used to capture EC logs from kernel, and ec_params_get_cmd_versions_v1 is used to probe whether EC supports that command. Signed-off-by: Nicolas Boichat Reviewed-by: Guenter Roeck Acked-by: Lee Jones Tested-by: Enric Balletbo i Serra Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_commands.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index c93e7e0300ef..1b19e424e1cf 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -625,6 +625,10 @@ struct ec_params_get_cmd_versions { uint8_t cmd; /* Command to check */ } __packed; +struct ec_params_get_cmd_versions_v1 { + uint16_t cmd; /* Command to check */ +} __packed; + struct ec_response_get_cmd_versions { /* * Mask of supported versions; use EC_VER_MASK() to compare with a @@ -2285,13 +2289,28 @@ struct ec_params_charge_control { #define EC_CMD_CONSOLE_SNAPSHOT 0x97 /* - * Read next chunk of data from saved snapshot. + * Read data from the saved snapshot. If the subcmd parameter is + * CONSOLE_READ_NEXT, this will return data starting from the beginning of + * the latest snapshot. If it is CONSOLE_READ_RECENT, it will start from the + * end of the previous snapshot. + * + * The params are only looked at in version >= 1 of this command. Prior + * versions will just default to CONSOLE_READ_NEXT behavior. * * Response is null-terminated string. Empty string, if there is no more * remaining output. */ #define EC_CMD_CONSOLE_READ 0x98 +enum ec_console_read_subcmd { + CONSOLE_READ_NEXT = 0, + CONSOLE_READ_RECENT +}; + +struct ec_params_console_read_v1 { + uint8_t subcmd; /* enum ec_console_read_subcmd */ +} __packed; + /*****************************************************************************/ /* -- cgit v1.2.3 From e86264595225d2764a903965356ef59aeb7d1c47 Mon Sep 17 00:00:00 2001 From: Eric Caruso Date: Tue, 16 May 2017 17:46:48 +0200 Subject: mfd: cros_ec: add debugfs, console log file If the EC supports the new CONSOLE_READ command type, then we place a console_log file in debugfs for that EC device which allows us to grab EC logs. The kernel will poll every 10 seconds for the log and keep its own buffer, but userspace should grab this and write it out to some logs which actually get rotated. Signed-off-by: Eric Caruso Signed-off-by: Nicolas Boichat Acked-by: Lee Jones Tested-by: Enric Balletbo i Serra [bleung: restored original version of this commit, with pointer size issue to be fixed in next commit] Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index b61b2e013698..3b16c9009749 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -172,6 +172,8 @@ struct cros_ec_platform { u16 cmd_offset; }; +struct cros_ec_debugfs; + /* * struct cros_ec_dev - ChromeOS EC device entry point * @@ -179,6 +181,7 @@ struct cros_ec_platform { * @cdev: Character device structure in /dev * @ec_dev: cros_ec_device structure to talk to the physical device * @dev: pointer to the platform device + * @debug_info: cros_ec_debugfs structure for debugging information * @cmd_offset: offset to apply for each command. */ struct cros_ec_dev { @@ -186,6 +189,7 @@ struct cros_ec_dev { struct cdev cdev; struct cros_ec_device *ec_dev; struct device *dev; + struct cros_ec_debugfs *debug_info; u16 cmd_offset; u32 features[2]; }; -- cgit v1.2.3 From 99b3c58f7ba7fae801e501b45c5fcf6e08d9247f Mon Sep 17 00:00:00 2001 From: Piotr Gregor Date: Fri, 26 May 2017 22:02:25 +0100 Subject: PCI: Test INTx masking during enumeration, not at run-time The test for INTx masking via PCI_COMMAND_INTX_DISABLE performed in pci_intx_mask_supported() should be done before the device can be used. This is to avoid writing PCI_COMMAND while the driver owns the device, in case that has any effect on MSI/MSI-X interrupts. Move the content of pci_intx_mask_supported() to pci_intx_mask_broken() and call it from pci_setup_device(). The test result can be queried at any time later using the same pci_intx_mask_supported() interface as before (though with changed implementation), so callers (uio, vfio) should be unaffected. Signed-off-by: Piotr Gregor [bhelgaas: changelog, remove quirk check, remove locking, move dev->broken_intx_masking assignment to caller] Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson Acked-by: Michael S. Tsirkin --- include/linux/pci.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..4f0613d5d2d9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -366,7 +366,7 @@ struct pci_dev { unsigned int is_thunderbolt:1; /* Thunderbolt controller */ unsigned int __aer_firmware_first_valid:1; unsigned int __aer_firmware_first:1; - unsigned int broken_intx_masking:1; + unsigned int broken_intx_masking:1; /* INTx masking can't be used */ unsigned int io_window_1k:1; /* Intel P2P bridge 1K I/O windows */ unsigned int irq_managed:1; unsigned int has_secondary_link:1; @@ -1003,6 +1003,15 @@ int __must_check pci_reenable_device(struct pci_dev *); int __must_check pcim_enable_device(struct pci_dev *pdev); void pcim_pin_device(struct pci_dev *pdev); +static inline bool pci_intx_mask_supported(struct pci_dev *pdev) +{ + /* + * INTx masking is supported if PCI_COMMAND_INTX_DISABLE is + * writable and no quirk has marked the feature broken. + */ + return !pdev->broken_intx_masking; +} + static inline int pci_is_enabled(struct pci_dev *pdev) { return (atomic_read(&pdev->enable_cnt) > 0); @@ -1026,7 +1035,6 @@ int __must_check pci_set_mwi(struct pci_dev *dev); int pci_try_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); -bool pci_intx_mask_supported(struct pci_dev *dev); bool pci_check_and_mask_intx(struct pci_dev *dev); bool pci_check_and_unmask_intx(struct pci_dev *dev); int pci_wait_for_pending(struct pci_dev *dev, int pos, u16 mask); -- cgit v1.2.3 From 7b9e93616399638521aafd1f01dfcf474c736393 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Jun 2017 18:15:21 +0200 Subject: blk-mq-sched: unify request finished methods No need to have two different callouts of bfq vs kyber. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/elevator.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 0e306c5a86d6..4acea351d43f 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -105,7 +105,7 @@ struct elevator_mq_ops { void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *); - void (*put_request)(struct request *); + void (*finish_request)(struct request *); void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); bool (*has_work)(struct blk_mq_hw_ctx *); @@ -115,7 +115,6 @@ struct elevator_mq_ops { struct request *(*former_request)(struct request_queue *, struct request *); struct request *(*next_request)(struct request_queue *, struct request *); int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *); - void (*put_rq_priv)(struct request_queue *, struct request *); void (*init_icq)(struct io_cq *); void (*exit_icq)(struct io_cq *); }; -- cgit v1.2.3 From 5bbf4e5a8e3a780874b2ed77bd1bd57850f3f6da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Jun 2017 18:15:26 +0200 Subject: blk-mq-sched: unify request prepare methods This patch makes sure we always allocate requests in the core blk-mq code and use a common prepare_request method to initialize them for both mq I/O schedulers. For Kyber and additional limit_depth method is added that is called before allocating the request. Also because none of the intializations can really fail the new method does not return an error - instead the bfq finish method is hardened to deal with the no-IOC case. Last but not least this removes the abuse of RQF_QUEUE by the blk-mq scheduling code as RQF_ELFPRIV is all that is needed now. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/elevator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 4acea351d43f..5bc8f8682a3e 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -104,7 +104,8 @@ struct elevator_mq_ops { int (*request_merge)(struct request_queue *q, struct request **, struct bio *); void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); - struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *); + void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *); + void (*prepare_request)(struct request *, struct bio *bio); void (*finish_request)(struct request *); void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); @@ -114,7 +115,6 @@ struct elevator_mq_ops { void (*requeue_request)(struct request *); struct request *(*former_request)(struct request_queue *, struct request *); struct request *(*next_request)(struct request_queue *, struct request *); - int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *); void (*init_icq)(struct io_cq *); void (*exit_icq)(struct io_cq *); }; -- cgit v1.2.3 From af67c31fba3b879b241536a48df703a2eee18ebf Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 18 Jun 2017 14:38:57 +1000 Subject: blk: remove bio_set arg from blk_queue_split() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit blk_queue_split() is always called with the last arg being q->bio_split, where 'q' is the first arg. Also blk_queue_split() sometimes uses the passed-in 'bs' and sometimes uses q->bio_split. This is inconsistent and unnecessary. Remove the last arg and always use q->bio_split inside blk_queue_split() Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Credit-to: Javier González (Noticed that lightnvm was missed) Reviewed-by: Javier González Tested-by: Javier González Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 76b6df862a12..670df402bc51 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -944,8 +944,7 @@ extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio *bio); extern void blk_delay_queue(struct request_queue *, unsigned long); -extern void blk_queue_split(struct request_queue *, struct bio **, - struct bio_set *); +extern void blk_queue_split(struct request_queue *, struct bio **); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, -- cgit v1.2.3 From 011067b05668b05aae88e5a24cff0ca0a67ca0b0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 18 Jun 2017 14:38:57 +1000 Subject: blk: replace bioset_create_nobvec() with a flags arg to bioset_create() "flags" arguments are often seen as good API design as they allow easy extensibility. bioset_create_nobvec() is implemented internally as a variation in flags passed to __bioset_create(). To support future extension, make the internal structure part of the API. i.e. add a 'flags' argument to bioset_create() and discard bioset_create_nobvec(). Note that the bio_split allocations in drivers/md/raid* do not need the bvec mempool - they should have used bioset_create_nobvec(). Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9455aada1399..985dc645637e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -373,8 +373,10 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, return bio_split(bio, sectors, gfp, bs); } -extern struct bio_set *bioset_create(unsigned int, unsigned int); -extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int); +extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags); +enum { + BIOSET_NEED_BVECS = BIT(0), +}; extern void bioset_free(struct bio_set *); extern mempool_t *biovec_create_pool(int pool_entries); -- cgit v1.2.3 From 47e0fb461fca1a68a566c82fcc006cc787312d8c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 18 Jun 2017 14:38:57 +1000 Subject: blk: make the bioset rescue_workqueue optional. This patch converts bioset_create() to not create a workqueue by default, so alloctions will never trigger punt_bios_to_rescuer(). It also introduces a new flag BIOSET_NEED_RESCUER which tells bioset_create() to preserve the old behavior. All callers of bioset_create() that are inside block device drivers, are given the BIOSET_NEED_RESCUER flag. biosets used by filesystems or other top-level users do not need rescuing as the bio can never be queued behind other bios. This includes fs_bio_set, blkdev_dio_pool, btrfs_bioset, xfs_ioend_bioset, and one allocated by target_core_iblock.c. biosets used by md/raid do not need rescuing as their usage was recently audited and revised to never risk deadlock. It is hoped that most, if not all, of the remaining biosets can end up being the non-rescued version. Reviewed-by: Christoph Hellwig Credit-to: Ming Lei (minor fixes) Reviewed-by: Ming Lei Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 985dc645637e..32c786baa10a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -376,6 +376,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags); enum { BIOSET_NEED_BVECS = BIT(0), + BIOSET_NEED_RESCUER = BIT(1), }; extern void bioset_free(struct bio_set *); extern mempool_t *biovec_create_pool(int pool_entries); -- cgit v1.2.3 From 9b10f6a9c2aaab49c56b8cff0facdc1b64ed7e1c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 18 Jun 2017 14:38:59 +1000 Subject: block: remove bio_clone() and all references. bio_clone() is no longer used. Only bio_clone_bioset() or bio_clone_fast(). This is for the best, as bio_clone() used fs_bio_set, and filesystems are unlikely to want to use bio_clone(). So remove bio_clone() and all references. This includes a fix to some incorrect documentation. Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 32c786baa10a..40d054185277 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -395,11 +395,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); } -static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) -{ - return bio_clone_bioset(bio, gfp_mask, fs_bio_set); -} - static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) { return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); -- cgit v1.2.3 From 97e0120990f4a7037f72c0e115e5c7f514025738 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 6 Jun 2017 23:22:01 +0800 Subject: blk-mq: move blk_mq_quiesce_queue() into include/linux/blk-mq.h We usually put blk_mq_*() into include/linux/blk-mq.h, so move this API into there. Signed-off-by: Ming Lei Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b144b7b0e104..99348adb3e16 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -244,6 +244,7 @@ void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); +void blk_mq_quiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 670df402bc51..8423f6baf818 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -965,7 +965,6 @@ extern void __blk_run_queue(struct request_queue *q); extern void __blk_run_queue_uncond(struct request_queue *q); extern void blk_run_queue(struct request_queue *); extern void blk_run_queue_async(struct request_queue *q); -extern void blk_mq_quiesce_queue(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); -- cgit v1.2.3 From 4f084b41a0c04a69067be98a210e6b50969f9945 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 6 Jun 2017 23:22:02 +0800 Subject: blk-mq: introduce blk_mq_quiesce_queue_nowait() This patch introduces blk_mq_quiesce_queue_nowait() so that we can workaround mpt3sas for quiescing its queue. Once mpt3sas is fixed, we can remove this helper. Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 99348adb3e16..78a8b64074ea 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -262,6 +262,14 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set); int blk_mq_map_queues(struct blk_mq_tag_set *set); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); +/* + * FIXME: this helper is just for working around mpt3sas. + */ +static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q) +{ + blk_mq_stop_hw_queues(q); +} + /* * Driver command data is immediately after the request. So subtract request * size to get back to the original request, add request size to get the PDU. -- cgit v1.2.3 From e4e739131ac93d373cd2d2fd92820a6a39115ba5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 6 Jun 2017 23:22:03 +0800 Subject: blk-mq: introduce blk_mq_unquiesce_queue blk_mq_start_stopped_hw_queues() is used implictly as counterpart of blk_mq_quiesce_queue() for unquiescing queue, so we introduce blk_mq_unquiesce_queue() and make it as counterpart of blk_mq_quiesce_queue() explicitly. This function is for improving the current quiescing mechanism in the following patches. Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 78a8b64074ea..787d8a2a2ac6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -245,6 +245,7 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); +void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); -- cgit v1.2.3 From f4560ffe8cec1361b1021d81aca6a4173f8e7c87 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 18 Jun 2017 14:24:27 -0600 Subject: blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue It is required that no dispatch can happen any more once blk_mq_quiesce_queue() returns, and we don't have such requirement on APIs of stopping queue. But blk_mq_quiesce_queue() still may not block/drain dispatch in the the case of BLK_MQ_S_START_ON_RUN, so use the new introduced flag of QUEUE_FLAG_QUIESCED and evaluate it inside RCU read-side critical sections for fixing this issue. Also blk_mq_quiesce_queue() is implemented via stopping queue, which limits its uses, and easy to cause race, because any queue restart in other paths may break blk_mq_quiesce_queue(). With the introduced flag of QUEUE_FLAG_QUIESCED, we don't need to depend on stopping queue for quiescing any more. Signed-off-by: Ming Lei Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++++ include/linux/blkdev.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 787d8a2a2ac6..de6536c14ae7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -268,6 +268,10 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); */ static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q) { + spin_lock_irq(q->queue_lock); + queue_flag_set(QUEUE_FLAG_QUIESCED, q); + spin_unlock_irq(q->queue_lock); + blk_mq_stop_hw_queues(q); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8423f6baf818..22cfba64ce81 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -619,6 +619,7 @@ struct request_queue { #define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */ #define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */ +#define QUEUE_FLAG_QUIESCED 31 /* queue has been quiesced */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -715,6 +716,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_noretry_request(rq) \ ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER)) +#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) static inline bool blk_account_rq(struct request *rq) { -- cgit v1.2.3 From 1d9e9bc6b56e1bb7e33e7e2e1b99d7088356c006 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 6 Jun 2017 23:22:08 +0800 Subject: blk-mq: don't stop queue for quiescing Queue can be started by other blk-mq APIs and can be used in different cases, this limits uses of blk_mq_quiesce_queue() if it is based on stopping queue, and make its usage very difficult, especially users have to use the stop queue APIs carefully for avoiding to break blk_mq_quiesce_queue(). We have applied the QUIESCED flag for draining and blocking dispatch, so it isn't necessary to stop queue any more. After stopping queue is removed, blk_mq_quiesce_queue() can be used safely and easily, then users won't worry about queue restarting during quiescing at all. Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index de6536c14ae7..f1bd13ae8f57 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -271,8 +271,6 @@ static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q) spin_lock_irq(q->queue_lock); queue_flag_set(QUEUE_FLAG_QUIESCED, q); spin_unlock_irq(q->queue_lock); - - blk_mq_stop_hw_queues(q); } /* -- cgit v1.2.3 From e33a3f84f88f13eab6a45c5230c9b9ee9ac78e60 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 30 Mar 2017 12:15:42 +0200 Subject: NFC: nfcmrvl: allow gpio 0 for reset signalling Allow gpio 0 to be used for reset signalling, and instead use negative errnos to disable the reset functionality. Signed-off-by: Johan Hovold Signed-off-by: Samuel Ortiz --- include/linux/platform_data/nfcmrvl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h index a6f9d633f5be..9e75ac8d19be 100644 --- a/include/linux/platform_data/nfcmrvl.h +++ b/include/linux/platform_data/nfcmrvl.h @@ -23,7 +23,7 @@ struct nfcmrvl_platform_data { */ /* GPIO that is wired to RESET_N signal */ - unsigned int reset_n_io; + int reset_n_io; /* Tell if transport is muxed in HCI one */ unsigned int hci_muxed; -- cgit v1.2.3 From 57129044f5044dcd73c22d91491906104bd331fd Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Mon, 5 Jun 2017 12:08:05 -0700 Subject: mfd: intel_soc_pmic_bxtwc: Use chained IRQs for second level IRQ chips Whishkey cove PMIC has support to mask/unmask interrupts at two levels. At first level we can mask/unmask interrupt domains like TMU, GPIO, ADC, CHGR, BCU THERMAL and PWRBTN and at second level, it provides facility to mask/unmask individual interrupts belong each of this domain. For example, in case of TMU, at first level we have TMU interrupt domain, and at second level we have two interrupts, wake alarm, system alarm that belong to the TMU interrupt domain. Currently, in this driver all first level IRQs are registered as part of IRQ chip(bxtwc_regmap_irq_chip). By default, after you register the IRQ chip from your driver, all IRQs in that chip will masked and can only be enabled if that IRQ is requested using request_irq() call. This is the default Linux IRQ behavior model. And whenever a dependent device that belongs to PMIC requests only the second level IRQ and not explicitly unmask the first level IRQ, then in essence the second level IRQ will still be disabled. For example, if TMU device driver request wake_alarm IRQ and not explicitly unmask TMU level 1 IRQ then according to the default Linux IRQ model, wake_alarm IRQ will still be disabled. So the proper solution to fix this issue is to use the chained IRQ chip concept. We should chain all the second level chip IRQs to the corresponding first level IRQ. To do this, we need to create separate IRQ chips for every group of second level IRQs. In case of TMU, when adding second level IRQ chip, instead of using PMIC IRQ we should use the corresponding first level IRQ. So the following code will change from ret = regmap_add_irq_chip(pmic->regmap, pmic->irq, ...) to, virq = regmap_irq_get_virq(&pmic->irq_chip_data, BXTWC_TMU_LVL1_IRQ); ret = regmap_add_irq_chip(pmic->regmap, virq, ...) In case of Whiskey Cove Type-C driver, Since USBC IRQ is moved under charger level2 IRQ chip. We should use charger IRQ chip(irq_chip_data_chgr) to get the USBC virtual IRQ number. Signed-off-by: Kuppuswamy Sathyanarayanan Reviewed-by: Andy Shevchenko Revieved-by: Heikki Krogerus Signed-off-by: Lee Jones --- include/linux/mfd/intel_soc_pmic.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h index 956caa0628f5..5aacdb017a9f 100644 --- a/include/linux/mfd/intel_soc_pmic.h +++ b/include/linux/mfd/intel_soc_pmic.h @@ -25,8 +25,11 @@ struct intel_soc_pmic { int irq; struct regmap *regmap; struct regmap_irq_chip_data *irq_chip_data; - struct regmap_irq_chip_data *irq_chip_data_level2; struct regmap_irq_chip_data *irq_chip_data_tmu; + struct regmap_irq_chip_data *irq_chip_data_bcu; + struct regmap_irq_chip_data *irq_chip_data_adc; + struct regmap_irq_chip_data *irq_chip_data_chgr; + struct regmap_irq_chip_data *irq_chip_data_crit; struct device *dev; }; -- cgit v1.2.3 From d2c3c8dcb5987b8352e82089c79a41b6e17e28d2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 20 Apr 2017 10:46:07 -0700 Subject: dm: convert DM printk macros to pr_ macros Using pr_ is the more common logging style. Standardize style and use new macro DM_FMT. Use no_printk in DMDEBUG macros when CONFIG_DM_DEBUG is not #defined. Signed-off-by: Joe Perches Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 71 +++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 456da5017b32..19bc7fdfd6da 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -543,48 +543,41 @@ extern struct ratelimit_state dm_ratelimit_state; #define dm_ratelimit() 0 #endif -#define DMCRIT(f, arg...) \ - printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) - -#define DMERR(f, arg...) \ - printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMERR_LIMIT(f, arg...) \ - do { \ - if (dm_ratelimit()) \ - printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " \ - f "\n", ## arg); \ - } while (0) - -#define DMWARN(f, arg...) \ - printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMWARN_LIMIT(f, arg...) \ - do { \ - if (dm_ratelimit()) \ - printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " \ - f "\n", ## arg); \ - } while (0) - -#define DMINFO(f, arg...) \ - printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMINFO_LIMIT(f, arg...) \ - do { \ - if (dm_ratelimit()) \ - printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f \ - "\n", ## arg); \ - } while (0) +#define DM_FMT(fmt) DM_NAME ": " DM_MSG_PREFIX ": " fmt "\n" + +#define DMCRIT(fmt, ...) pr_crit(DM_FMT(fmt), ##__VA_ARGS__) + +#define DMERR(fmt, ...) pr_err(DM_FMT(fmt), ##__VA_ARGS__) +#define DMERR_LIMIT(fmt, ...) \ +do { \ + if (dm_ratelimit()) \ + DMERR(fmt, ##__VA_ARGS__); \ +} while (0) + +#define DMWARN(fmt, ...) pr_warn(DM_FMT(fmt), ##__VA_ARGS__) +#define DMWARN_LIMIT(fmt, ...) \ +do { \ + if (dm_ratelimit()) \ + DMWARN(fmt, ##__VA_ARGS__); \ +} while (0) + +#define DMINFO(fmt, ...) pr_info(DM_FMT(fmt), ##__VA_ARGS__) +#define DMINFO_LIMIT(fmt, ...) \ +do { \ + if (dm_ratelimit()) \ + DMINFO(fmt, ##__VA_ARGS__); \ +} while (0) #ifdef CONFIG_DM_DEBUG -# define DMDEBUG(f, arg...) \ - printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg) -# define DMDEBUG_LIMIT(f, arg...) \ - do { \ - if (dm_ratelimit()) \ - printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX ": " f \ - "\n", ## arg); \ - } while (0) +#define DMDEBUG(fmt, ...) printk(KERN_DEBUG DM_FMT(fmt), ##__VA_ARGS__) +#define DMDEBUG_LIMIT(fmt, ...) \ +do { \ + if (dm_ratelimit()) \ + DMDEBUG(fmt, ##__VA_ARGS__); \ +} while (0) #else -# define DMDEBUG(f, arg...) do {} while (0) -# define DMDEBUG_LIMIT(f, arg...) do {} while (0) +#define DMDEBUG(fmt, ...) no_printk(fmt, ##__VA_ARGS__) +#define DMDEBUG_LIMIT(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #define DMEMIT(x...) sz += ((sz >= maxlen) ? \ -- cgit v1.2.3 From dd88d313bef0277e27597aa394607ed26c658724 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 16:40:43 -0700 Subject: dm table: add zoned block devices validation 1) Introduce DM_TARGET_ZONED_HM feature flag: The target drivers currently available will not operate correctly if a table target maps onto a host-managed zoned block device. To avoid problems, introduce the new feature flag DM_TARGET_ZONED_HM to allow a target to explicitly state that it supports host-managed zoned block devices. This feature is checked for all targets in a table if any of the table's block devices are host-managed. Note that as host-aware zoned block devices are backward compatible with regular block devices, they can be used by any of the current target types. This new feature is thus restricted to host-managed zoned block devices. 2) Check device area zone alignment: If a target maps to a zoned block device, check that the device area is aligned on zone boundaries to avoid problems with REQ_OP_ZONE_RESET operations (resetting a partially mapped sequential zone would not be possible). This also facilitates the processing of zone report with REQ_OP_ZONE_REPORT bios. 3) Check block devices zone model compatibility When setting the DM device's queue limits, several possibilities exists for zoned block devices: 1) The DM target driver may want to expose a different zone model (e.g. host-managed device emulation or regular block device on top of host-managed zoned block devices) 2) Expose the underlying zone model of the devices as-is To allow both cases, the underlying block device zone model must be set in the target limits in dm_set_device_limits() and the compatibility of all devices checked similarly to the logical block size alignment. For this last check, introduce validate_hardware_zoned_model() to check that all targets of a table have the same zone model and that the zone size of the target devices are equal. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche [Mike Snitzer refactored Damien's original work to simplify the code] Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 19bc7fdfd6da..186ef74009cb 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -237,6 +237,12 @@ typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio) #define DM_TARGET_PASSES_INTEGRITY 0x00000020 #define dm_target_passes_integrity(type) ((type)->features & DM_TARGET_PASSES_INTEGRITY) +/* + * Indicates that a target supports host-managed zoned block devices. + */ +#define DM_TARGET_ZONED_HM 0x00000040 +#define dm_target_supports_zoned_hm(type) ((type)->features & DM_TARGET_ZONED_HM) + struct dm_target { struct dm_table *table; struct target_type *type; -- cgit v1.2.3 From 10999307c14eac281fbec3ada73bee7a05bd41dc Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 16:40:48 -0700 Subject: dm: introduce dm_remap_zone_report() A target driver support zoned block devices and exposing it as such may receive REQ_OP_ZONE_REPORT request for the user to determine the mapped device zone configuration. To process properly such request, the target driver may need to remap the zone descriptors provided in the report reply. The helper function dm_remap_zone_report() does this generically using only the target start offset and length and the start offset within the target device. dm_remap_zone_report() will remap the start sector of all zones reported. If the report includes sequential zones, the write pointer position of these zones will also be remapped. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 186ef74009cb..0c1b50ad23b0 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -450,6 +450,8 @@ struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); +void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, + sector_t start); union map_info *dm_get_rq_mapinfo(struct request *rq); struct queue_limits *dm_get_queue_limits(struct mapped_device *md); -- cgit v1.2.3 From b73c67c2cbb0004e6da9720a167fe42e31f7a6e8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 16:40:51 -0700 Subject: dm kcopyd: add sequential write feature When copyying blocks to host-managed zoned block devices, writes must be sequential. However, dm_kcopyd_copy() does not guarantee this as writes are issued in the completion order of reads, and reads may complete out of order despite being issued sequentially. Fix this by introducing the DM_KCOPYD_WRITE_SEQ feature flag. This can be specified when calling dm_kcopyd_copy() and should be set automatically if one of the destinations is a host-managed zoned block device. For a split job, the master job maintains the write position at which writes must be issued. This is checked with the pop() function which is modified to not return any write I/O sub job that is not at the correct write position. When DM_KCOPYD_WRITE_SEQ is specified for a job, errors cannot be ignored and the flag DM_KCOPYD_IGNORE_ERROR is ignored, even if specified by the user. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Mike Snitzer --- include/linux/dm-kcopyd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index f486d636b82e..cfac8588ed56 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -20,6 +20,7 @@ #define DM_KCOPYD_MAX_REGIONS 8 #define DM_KCOPYD_IGNORE_ERROR 1 +#define DM_KCOPYD_WRITE_SEQ 2 struct dm_kcopyd_throttle { unsigned throttle; -- cgit v1.2.3 From e15b9c50c4555e30be3c4f26aab7aeb10aee7aa6 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 31 May 2017 16:55:43 +0800 Subject: netfilter: ebt: Use new helper ebt_invalid_target to check target Use the new helper function ebt_invalid_target instead of the old macro INVALID_TARGET and other duplicated codes to enhance the readability. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index e0cbf17af780..2c2a5514b0df 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -122,8 +122,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS)) /* Clear the bit in the hook mask that tells if the rule is on a base chain */ #define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS)) -/* True if the target is not a standard target */ -#define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) static inline bool ebt_invalid_target(int target) { -- cgit v1.2.3 From 04ba724b659c6808b0ca31528121bdb2f2807e00 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Jun 2017 18:35:46 +0100 Subject: netfilter: nfnetlink: extended ACK reporting Pass down struct netlink_ext_ack as parameter to all of our nfnetlink subsystem callbacks, so we can work on follow up patches to provide finer grain error reporting using the new infrastructure that 2d4bc93368f5 ("netlink: extended ACK reporting") provides. No functional change, just pass down this new object to callbacks. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 996711d8a7b4..41d04e9d088a 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -1,7 +1,6 @@ #ifndef _NFNETLINK_H #define _NFNETLINK_H - #include #include #include @@ -10,13 +9,16 @@ struct nfnl_callback { int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]); + const struct nlattr * const cda[], + struct netlink_ext_ack *extack); int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]); + const struct nlattr * const cda[], + struct netlink_ext_ack *extack); int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const cda[]); + const struct nlattr * const cda[], + struct netlink_ext_ack *extack); const struct nla_policy *policy; /* netlink attribute policy */ const u_int16_t attr_count; /* number of nlattr's */ }; -- cgit v1.2.3 From 204a2be30a7a8a8d12642f23f3fbdc8b9923b500 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Wed, 7 Jun 2017 00:11:44 +0200 Subject: m68k: Remove ptrace_signal_deliver This fixes debugger syscall restart interactions. A debugger that modifies the tracee's program counter is expected to set the orig_d0 pseudo register to -1, to disable a possible syscall restart. This removes the last user of the ptrace_signal_deliver hook in the ptrace signal handling, so remove that as well. Signed-off-by: Andreas Schwab Signed-off-by: Geert Uytterhoeven --- include/linux/ptrace.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 422bc2e4cb6a..9a2e04be0657 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -388,10 +388,6 @@ static inline void user_single_step_siginfo(struct task_struct *tsk, #define current_pt_regs() task_pt_regs(current) #endif -#ifndef ptrace_signal_deliver -#define ptrace_signal_deliver() ((void)0) -#endif - /* * unlike current_pt_regs(), this one is equal to task_pt_regs(current) * on *all* architectures; the only reason to have a per-arch definition -- cgit v1.2.3 From e297a783e41560b44e3c14f38e420cba518113b8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 7 Jun 2017 19:58:56 -0400 Subject: random: add wait_for_random_bytes() API This enables users of get_random_{bytes,u32,u64,int,long} to wait until the pool is ready before using this function, in case they actually want to have reliable randomness. Signed-off-by: Jason A. Donenfeld Signed-off-by: Theodore Ts'o --- include/linux/random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index ed5c3838780d..e29929347c95 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -34,6 +34,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code, extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); +extern int wait_for_random_bytes(void); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -- cgit v1.2.3 From da9ba564bd683374b8d319756f312821b8265b06 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 7 Jun 2017 20:05:02 -0400 Subject: random: add get_random_{bytes,u32,u64,int,long,once}_wait family These functions are simple convenience wrappers that call wait_for_random_bytes before calling the respective get_random_* function. Signed-off-by: Jason A. Donenfeld Signed-off-by: Theodore Ts'o --- include/linux/net.h | 2 ++ include/linux/once.h | 2 ++ include/linux/random.h | 25 +++++++++++++++++++++++++ 3 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index abcfa46a2bd9..dda2cc939a53 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -274,6 +274,8 @@ do { \ #define net_get_random_once(buf, nbytes) \ get_random_once((buf), (nbytes)) +#define net_get_random_once_wait(buf, nbytes) \ + get_random_once_wait((buf), (nbytes)) int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); diff --git a/include/linux/once.h b/include/linux/once.h index 285f12cb40e6..9c98aaa87cbc 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -53,5 +53,7 @@ void __do_once_done(bool *done, struct static_key *once_key, #define get_random_once(buf, nbytes) \ DO_ONCE(get_random_bytes, (buf), (nbytes)) +#define get_random_once_wait(buf, nbytes) \ + DO_ONCE(get_random_bytes_wait, (buf), (nbytes)) \ #endif /* _LINUX_ONCE_H */ diff --git a/include/linux/random.h b/include/linux/random.h index e29929347c95..4aecc339558d 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -58,6 +58,31 @@ static inline unsigned long get_random_long(void) #endif } +/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). + * Returns the result of the call to wait_for_random_bytes. */ +static inline int get_random_bytes_wait(void *buf, int nbytes) +{ + int ret = wait_for_random_bytes(); + if (unlikely(ret)) + return ret; + get_random_bytes(buf, nbytes); + return 0; +} + +#define declare_get_random_var_wait(var) \ + static inline int get_random_ ## var ## _wait(var *out) { \ + int ret = wait_for_random_bytes(); \ + if (unlikely(ret)) \ + return ret; \ + *out = get_random_ ## var(); \ + return 0; \ + } +declare_get_random_var_wait(u32) +declare_get_random_var_wait(u64) +declare_get_random_var_wait(int) +declare_get_random_var_wait(long) +#undef declare_get_random_var + unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); -- cgit v1.2.3 From 63709fd4296221aa4ebd06230bce3eed70ddd927 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 16 Jun 2017 21:13:38 +0300 Subject: uuid: Take const on input of uuid_is_null() and guid_is_null() The null check functions do not and must not modify contents of the UUID or GUID supplied. Mark argument explicitly to reflect that. Signed-off-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- include/linux/uuid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 75f7182d5360..d1defe4ab167 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -48,7 +48,7 @@ static inline void guid_copy(guid_t *dst, const guid_t *src) memcpy(dst, src, sizeof(guid_t)); } -static inline bool guid_is_null(guid_t *guid) +static inline bool guid_is_null(const guid_t *guid) { return guid_equal(guid, &guid_null); } @@ -63,7 +63,7 @@ static inline void uuid_copy(uuid_t *dst, const uuid_t *src) memcpy(dst, src, sizeof(uuid_t)); } -static inline bool uuid_is_null(uuid_t *uuid) +static inline bool uuid_is_null(const uuid_t *uuid) { return uuid_equal(uuid, &uuid_null); } -- cgit v1.2.3 From 682696605c7093d2800c498c04166831e5aedf87 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 13 Apr 2017 16:48:11 +0200 Subject: mmc: sdio: Add API to manage SDIO IRQs from a workqueue For hosts not supporting MMC_CAP2_SDIO_IRQ_NOTHREAD but MMC_CAP_SDIO_IRQ, the SDIO IRQs are processed from a dedicated kernel thread. For these cases, the host calls mmc_signal_sdio_irq() from its ISR to signal a new SDIO IRQ. Signaling an SDIO IRQ makes the host's ->enable_sdio_irq() callback to be invoked to temporary disable the IRQs, before the kernel thread is woken up to process it. When processing of the IRQs are completed, they are re-enabled by the kernel thread, again via invoking the host's ->enable_sdio_irq(). The observation from this, is that the execution path is being unnecessary complex, as the host driver already knows that it needs to temporary disable the IRQs before signaling a new one. Moreover, replacing the kernel thread with a work/workqueue would not only greatly simplify the code, but also make it more robust. To address the above problems, let's continue to build upon the support for MMC_CAP2_SDIO_IRQ_NOTHREAD, as it already implements SDIO IRQs to be processed without using the clumsy kernel thread and without the ping-pong calls of the host's ->enable_sdio_irq() callback for each processed IRQ. Therefore, let's add new API sdio_signal_irq(), which enables hosts to signal/process SDIO IRQs by using a work/workqueue, rather than using the kernel thread. Add also a new host callback ->ack_sdio_irq(), which the work invokes when the SDIO IRQs have been processed. This informs the host about when it shall re-enable the SDIO IRQs. Potentially, we could re-use the existing ->enable_sdio_irq() callback instead of adding a new one, however it has turned out that it's more convenient for hosts to get this information via a separate callback. Hosts that wants to use this new method to signal/process SDIO IRQs, must enable MMC_CAP2_SDIO_IRQ_NOTHREAD and implement the ->ack_sdio_irq() callback. Signed-off-by: Ulf Hansson Tested-by: Douglas Anderson Reviewed-by: Douglas Anderson --- include/linux/mmc/host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 21385ac0c9b1..f186b26c05a4 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -130,6 +130,7 @@ struct mmc_host_ops { int (*get_cd)(struct mmc_host *host); void (*enable_sdio_irq)(struct mmc_host *host, int enable); + void (*ack_sdio_irq)(struct mmc_host *host); /* optional callback for HC quirks */ void (*init_card)(struct mmc_host *host, struct mmc_card *card); @@ -358,6 +359,7 @@ struct mmc_host { unsigned int sdio_irqs; struct task_struct *sdio_irq_thread; + struct delayed_work sdio_irq_work; bool sdio_irq_pending; atomic_t sdio_irq_thread_abort; @@ -428,6 +430,7 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host) } void sdio_run_irqs(struct mmc_host *host); +void sdio_signal_irq(struct mmc_host *host); #ifdef CONFIG_REGULATOR int mmc_regulator_get_ocrmask(struct regulator *supply); -- cgit v1.2.3 From c3dccb74be28a345a2ebcc224e41b774529b8b8f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 18 May 2017 11:29:31 +0200 Subject: mmc: core: Delete bounce buffer Kconfig option This option is activated by all multiplatform configs and what not so we almost always have it turned on, and the memory it saves is negligible, even more so moving forward. The actual bounce buffer only gets allocated only when used, the only thing the ifdefs are saving is a little bit of code. It is highly improper to have this as a Kconfig option that get turned on by Kconfig, make this a pure runtime-thing and let the host decide whether we use bounce buffers. We add a new property "disable_bounce" to the host struct. Notice that mmc_queue_calc_bouncesz() already disables the bounce buffers if host->max_segs != 1, so any arch that has a maximum number of segments higher than 1 will have bounce buffers disabled. The option CONFIG_MMC_BLOCK_BOUNCE is default y so the majority of platforms in the kernel already have it on, and it then gets turned off at runtime since most of these have a host->max_segs > 1. The few exceptions that have host->max_segs == 1 and still turn off the bounce buffering are those that disable it in their defconfig. Those are the following: arch/arm/configs/colibri_pxa300_defconfig arch/arm/configs/zeus_defconfig - Uses MMC_PXA, drivers/mmc/host/pxamci.c - Sets host->max_segs = NR_SG, which is 1 - This needs its bounce buffer deactivated so we set host->disable_bounce to true in the host driver arch/arm/configs/davinci_all_defconfig - Uses MMC_DAVINCI, drivers/mmc/host/davinci_mmc.c - This driver sets host->max_segs to MAX_NR_SG, which is 16 - That means this driver anyways disabled bounce buffers - No special action needed for this platform arch/arm/configs/lpc32xx_defconfig arch/arm/configs/nhk8815_defconfig arch/arm/configs/u300_defconfig - Uses MMC_ARMMMCI, drivers/mmc/host/mmci.[c|h] - This driver by default sets host->max_segs to NR_SG, which is 128, unless a DMA engine is used, and in that case the number of segments are also > 1 - That means this driver already disables bounce buffers - No special action needed for these platforms arch/arm/configs/sama5_defconfig - Uses MMC_SDHCI, MMC_SDHCI_PLTFM, MMC_SDHCI_OF_AT91, MMC_ATMELMCI - Uses drivers/mmc/host/sdhci.c - Normally sets host->max_segs to SDHCI_MAX_SEGS which is 128 and thus disables bounce buffers - Sets host->max_segs to 1 if SDHCI_USE_SDMA is set - SDHCI_USE_SDMA is only set by SDHCI on PCI adapers - That means that for this platform bounce buffers are already disabled at runtime - No special action needed for this platform arch/blackfin/configs/CM-BF533_defconfig arch/blackfin/configs/CM-BF537E_defconfig - Uses MMC_SPI (a simple MMC card connected on SPI pins) - Uses drivers/mmc/host/mmc_spi.c - Sets host->max_segs to MMC_SPI_BLOCKSATONCE which is 128 - That means this platform already disables bounce buffers at runtime - No special action needed for these platforms arch/mips/configs/cavium_octeon_defconfig - Uses MMC_CAVIUM_OCTEON, drivers/mmc/host/cavium.c - Sets host->max_segs to 16 or 1 - Setting host->disable_bounce to be sure for the 1 case arch/mips/configs/qi_lb60_defconfig - Uses MMC_JZ4740, drivers/mmc/host/jz4740_mmc.c - This sets host->max_segs to 128 so bounce buffers are already runtime disabled - No action needed for this platform It would be interesting to come up with a list of the platforms that actually end up using bounce buffers. I have not been able to infer such a list, but it occurs when host->max_segs == 1 and the bounce buffering is not explicitly disabled. Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f186b26c05a4..9209f95a5106 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -271,6 +271,7 @@ struct mmc_host { #define MMC_CAP_UHS_SDR50 (1 << 18) /* Host supports UHS SDR50 mode */ #define MMC_CAP_UHS_SDR104 (1 << 19) /* Host supports UHS SDR104 mode */ #define MMC_CAP_UHS_DDR50 (1 << 20) /* Host supports UHS DDR50 mode */ +#define MMC_CAP_NO_BOUNCE_BUFF (1 << 21) /* Disable bounce buffers on host */ #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ -- cgit v1.2.3 From 304419d8a7e9204c5d19b704467b814df8c8f5b1 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 18 May 2017 11:29:32 +0200 Subject: mmc: core: Allocate per-request data using the block layer core The mmc_queue_req is a per-request state container the MMC core uses to carry bounce buffers, pointers to asynchronous requests and so on. Currently allocated as a static array of objects, then as a request comes in, a mmc_queue_req is assigned to it, and used during the lifetime of the request. This is backwards compared to how other block layer drivers work: they usally let the block core provide a per-request struct that get allocated right beind the struct request, and which can be obtained using the blk_mq_rq_to_pdu() helper. (The _mq_ infix in this function name is misleading: it is used by both the old and the MQ block layer.) The per-request struct gets allocated to the size stored in the queue variable .cmd_size initialized using the .init_rq_fn() and cleaned up using .exit_rq_fn(). The block layer code makes the MMC core rely on this mechanism to allocate the per-request mmc_queue_req state container. Doing this make a lot of complicated queue handling go away. We only need to keep the .qnct that keeps count of how many request are currently being processed by the MMC layer. The MQ block layer will replace also this once we transition to it. Doing this refactoring is necessary to move the ioctl() operations into custom block layer requests tagged with REQ_OP_DRV_[IN|OUT] instead of the custom code using the BigMMCHostLock that we have today: those require that per-request data be obtainable easily from a request after creating a custom request with e.g.: struct request *rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM); struct mmc_queue_req *mq_rq = req_to_mq_rq(rq); And this is not possible with the current construction, as the request is not immediately assigned the per-request state container, but instead it gets assigned when the request finally enters the MMC queue, which is way too late for custom requests. Signed-off-by: Linus Walleij [Ulf: Folded in the fix to drop a call to blk_cleanup_queue()] Signed-off-by: Ulf Hansson Tested-by: Heiner Kallweit --- include/linux/mmc/card.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index aad015e0152b..46c73e97e61f 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -305,9 +305,7 @@ struct mmc_card { struct mmc_part part[MMC_NUM_PHY_PARTITION]; /* physical partitions */ unsigned int nr_parts; - struct mmc_queue_req *mqrq; /* Shared queue structure */ unsigned int bouncesz; /* Bounce buffer size */ - int qdepth; /* Shared queue depth */ }; static inline bool mmc_large_sector(struct mmc_card *card) -- cgit v1.2.3 From d63c2bf49c0de83e88153da3af9970f68c633257 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 28 May 2017 11:30:47 +0200 Subject: mmc: use proper name for the R-Car SoC It is 'R-Car', not 'RCar'. No code or binding changes, only descriptive text. Signed-off-by: Wolfram Sang Acked-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index a1520d88ebf3..c83c16b931a8 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -66,7 +66,7 @@ */ #define TMIO_MMC_SDIO_IRQ (1 << 2) -/* Some features are only available or tested on RCar Gen2 or later */ +/* Some features are only available or tested on R-Car Gen2 or later */ #define TMIO_MMC_MIN_RCAR2 (1 << 3) /* -- cgit v1.2.3 From d2a47176a877b1eccd3086a4c8d790d644d594cb Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 8 Jun 2017 15:23:08 +0200 Subject: mmc: core: Remove MMC_CAP2_HC_ERASE_SZ The MMC_CAP2_HC_ERASE_SZ is used only by a few mmc host drivers. Its intent is to enable eMMC's high-capacity erase size, as to improve the behaviour of the erase operations. We should strive to avoid software configuration options that aren't necessary, but instead deploy common behaviours. For these reasons, let's remove the capability bit for MMC_CAP2_HC_ERASE_SZ and make it the default behaviour. Note that this change doesn't affect eMMCs supporting trim/discard, because these commands operates on sectors and takes precedence over erase commands. Signed-off-by: Ulf Hansson Acked-by: Adrian Hunter Reviewed-by: Linus Walleij Reviewed-by: Shawn Lin Tested-by: Shawn Lin --- include/linux/mmc/host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 9209f95a5106..c81380a2181f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -287,7 +287,6 @@ struct mmc_host { #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ MMC_CAP2_HS200_1_2V_SDR) -#define MMC_CAP2_HC_ERASE_SZ (1 << 9) /* High-capacity erase size */ #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ #define MMC_CAP2_PACKED_RD (1 << 12) /* Allow packed read */ -- cgit v1.2.3 From 03dbaa04a2e5bac0ae907a9ed31472bc4bb56fd3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 13 Jun 2017 15:07:51 +0300 Subject: mmc: slot-gpio: Add support to enable irq wake on cd_irq Add host capability MMC_CAP_CD_WAKE to enable irq wake on the card detect irq. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index c81380a2181f..ebd1cebbef0c 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -185,6 +185,7 @@ struct mmc_async_req { */ struct mmc_slot { int cd_irq; + bool cd_wake_enabled; void *handler_priv; }; @@ -275,6 +276,7 @@ struct mmc_host { #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ +#define MMC_CAP_CD_WAKE (1 << 28) /* Enable card detect wake */ #define MMC_CAP_CMD_DURING_TFR (1 << 29) /* Commands during data transfer */ #define MMC_CAP_CMD23 (1 << 30) /* CMD23 supported. */ #define MMC_CAP_HW_RESET (1 << 31) /* Hardware reset */ -- cgit v1.2.3 From f2218db81548544bf7349911546a94bfaabbd697 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 16 Jun 2017 18:11:03 +0200 Subject: mmc: tmio: improve checkpatch cleanness Trivial updates to improve checkpatch cleanness. Signed-off-by: Simon Horman Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index c83c16b931a8..26e8f8c0a6db 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -13,15 +13,15 @@ #define tmio_ioread16(addr) readw(addr) #define tmio_ioread16_rep(r, b, l) readsw(r, b, l) #define tmio_ioread32(addr) \ - (((u32) readw((addr))) | (((u32) readw((addr) + 2)) << 16)) + (((u32)readw((addr))) | (((u32)readw((addr) + 2)) << 16)) #define tmio_iowrite8(val, addr) writeb((val), (addr)) #define tmio_iowrite16(val, addr) writew((val), (addr)) #define tmio_iowrite16_rep(r, b, l) writesw(r, b, l) #define tmio_iowrite32(val, addr) \ do { \ - writew((val), (addr)); \ - writew((val) >> 16, (addr) + 2); \ + writew((val), (addr)); \ + writew((val) >> 16, (addr) + 2); \ } while (0) #define CNF_CMD 0x04 @@ -55,57 +55,57 @@ } while (0) /* tmio MMC platform flags */ -#define TMIO_MMC_WRPROTECT_DISABLE (1 << 0) +#define TMIO_MMC_WRPROTECT_DISABLE BIT(0) /* * Some controllers can support a 2-byte block size when the bus width * is configured in 4-bit mode. */ -#define TMIO_MMC_BLKSZ_2BYTES (1 << 1) +#define TMIO_MMC_BLKSZ_2BYTES BIT(1) /* * Some controllers can support SDIO IRQ signalling. */ -#define TMIO_MMC_SDIO_IRQ (1 << 2) +#define TMIO_MMC_SDIO_IRQ BIT(2) /* Some features are only available or tested on R-Car Gen2 or later */ -#define TMIO_MMC_MIN_RCAR2 (1 << 3) +#define TMIO_MMC_MIN_RCAR2 BIT(3) /* * Some controllers require waiting for the SD bus to become * idle before writing to some registers. */ -#define TMIO_MMC_HAS_IDLE_WAIT (1 << 4) +#define TMIO_MMC_HAS_IDLE_WAIT BIT(4) /* * A GPIO is used for card hotplug detection. We need an extra flag for this, * because 0 is a valid GPIO number too, and requiring users to specify * cd_gpio < 0 to disable GPIO hotplug would break backwards compatibility. */ -#define TMIO_MMC_USE_GPIO_CD (1 << 5) +#define TMIO_MMC_USE_GPIO_CD BIT(5) /* * Some controllers doesn't have over 0x100 register. * it is used to checking accessibility of * CTL_SD_CARD_CLK_CTL / CTL_CLK_AND_WAIT_CTL */ -#define TMIO_MMC_HAVE_HIGH_REG (1 << 6) +#define TMIO_MMC_HAVE_HIGH_REG BIT(6) /* * Some controllers have CMD12 automatically * issue/non-issue register */ -#define TMIO_MMC_HAVE_CMD12_CTRL (1 << 7) +#define TMIO_MMC_HAVE_CMD12_CTRL BIT(7) /* Controller has some SDIO status bits which must be 1 */ -#define TMIO_MMC_SDIO_STATUS_SETBITS (1 << 8) +#define TMIO_MMC_SDIO_STATUS_SETBITS BIT(8) /* * Some controllers have a 32-bit wide data port register */ -#define TMIO_MMC_32BIT_DATA_PORT (1 << 9) +#define TMIO_MMC_32BIT_DATA_PORT BIT(9) /* * Some controllers allows to set SDx actual clock */ -#define TMIO_MMC_CLK_ACTUAL (1 << 10) +#define TMIO_MMC_CLK_ACTUAL BIT(10) int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); @@ -146,9 +146,9 @@ struct tmio_nand_data { struct tmio_fb_data { int (*lcd_set_power)(struct platform_device *fb_dev, - bool on); + bool on); int (*lcd_mode)(struct platform_device *fb_dev, - const struct fb_videomode *mode); + const struct fb_videomode *mode); int num_modes; struct fb_videomode *modes; @@ -157,5 +157,4 @@ struct tmio_fb_data { int width; }; - #endif -- cgit v1.2.3 From ac6424b981bce1c4bc55675c6ce11bfe1bbfa64f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jun 2017 12:06:13 +0200 Subject: sched/wait: Rename wait_queue_t => wait_queue_entry_t Rename: wait_queue_t => wait_queue_entry_t 'wait_queue_t' was always a slight misnomer: its name implies that it's a "queue", but in reality it's a queue *entry*. The 'real' queue is the wait queue head, which had to carry the name. Start sorting this out by renaming it to 'wait_queue_entry_t'. This also allows the real structure name 'struct __wait_queue' to lose its double underscore and become 'struct wait_queue_entry', which is the more canonical nomenclature for such data types. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/blk-mq.h | 2 +- include/linux/eventfd.h | 4 +-- include/linux/kvm_irqfd.h | 2 +- include/linux/pagemap.h | 2 +- include/linux/poll.h | 2 +- include/linux/vfio.h | 2 +- include/linux/wait.h | 67 +++++++++++++++++++++++++---------------------- 7 files changed, 42 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fcd641032f8d..95ba83806c5d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -33,7 +33,7 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx **ctxs; unsigned int nr_ctx; - wait_queue_t dispatch_wait; + wait_queue_entry_t dispatch_wait; atomic_t wait_index; struct blk_mq_tags *tags; diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index ff0b981f078e..9e4befd95bc7 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -37,7 +37,7 @@ struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt); -int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); #else /* CONFIG_EVENTFD */ @@ -73,7 +73,7 @@ static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, } static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, - wait_queue_t *wait, __u64 *cnt) + wait_queue_entry_t *wait, __u64 *cnt) { return -ENOSYS; } diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index 0c1de05098c8..76c2fbc59f35 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -46,7 +46,7 @@ struct kvm_kernel_irqfd_resampler { struct kvm_kernel_irqfd { /* Used for MSI fast-path */ struct kvm *kvm; - wait_queue_t wait; + wait_queue_entry_t wait; /* Update side is protected by irqfds.lock */ struct kvm_kernel_irq_routing_entry irq_entry; seqcount_t irq_entry_sc; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 316a19f6b635..e7bbd9d4dc6c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -524,7 +524,7 @@ void page_endio(struct page *page, bool is_write, int err); /* * Add an arbitrary waiter to a page's wait queue */ -extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter); +extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); /* * Fault everything in given userspace address range in. diff --git a/include/linux/poll.h b/include/linux/poll.h index 75ffc5729e4c..2889f09a1c60 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -75,7 +75,7 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) struct poll_table_entry { struct file *filp; unsigned long key; - wait_queue_t wait; + wait_queue_entry_t wait; wait_queue_head_t *wait_address; }; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index edf9b2cad277..f57076b958b7 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -183,7 +183,7 @@ struct virqfd { void (*thread)(void *, void *); void *data; struct work_struct inject; - wait_queue_t wait; + wait_queue_entry_t wait; poll_table pt; struct work_struct shutdown; struct virqfd **pvirqfd; diff --git a/include/linux/wait.h b/include/linux/wait.h index db076ca7f11d..5889f0c86ff7 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -10,15 +10,18 @@ #include #include -typedef struct __wait_queue wait_queue_t; -typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); -int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); +typedef struct wait_queue_entry wait_queue_entry_t; +typedef int (*wait_queue_func_t)(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); +int default_wake_function(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); -/* __wait_queue::flags */ +/* wait_queue_entry::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 #define WQ_FLAG_WOKEN 0x02 -struct __wait_queue { +/* + * A single wait-queue entry structure: + */ +struct wait_queue_entry { unsigned int flags; void *private; wait_queue_func_t func; @@ -34,7 +37,7 @@ struct wait_bit_key { struct wait_bit_queue { struct wait_bit_key key; - wait_queue_t wait; + wait_queue_entry_t wait; }; struct __wait_queue_head { @@ -55,7 +58,7 @@ struct task_struct; .task_list = { NULL, NULL } } #define DECLARE_WAITQUEUE(name, tsk) \ - wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk) + wait_queue_entry_t name = __WAITQUEUE_INITIALIZER(name, tsk) #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ @@ -88,7 +91,7 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name) #endif -static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) +static inline void init_waitqueue_entry(wait_queue_entry_t *q, struct task_struct *p) { q->flags = 0; q->private = p; @@ -96,7 +99,7 @@ static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) } static inline void -init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func) +init_waitqueue_func_entry(wait_queue_entry_t *q, wait_queue_func_t func) { q->flags = 0; q->private = NULL; @@ -159,11 +162,11 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq) return waitqueue_active(wq); } -extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); -extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); -extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); +extern void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait); +extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait); +extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait); -static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) +static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *new) { list_add(&new->task_list, &head->task_list); } @@ -172,27 +175,27 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) * Used for wake-one threads: */ static inline void -__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) { wait->flags |= WQ_FLAG_EXCLUSIVE; __add_wait_queue(q, wait); } -static inline void __add_wait_queue_tail(wait_queue_head_t *head, - wait_queue_t *new) +static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, + wait_queue_entry_t *new) { list_add_tail(&new->task_list, &head->task_list); } static inline void -__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) { wait->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue_tail(q, wait); + __add_wait_queue_entry_tail(q, wait); } static inline void -__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) +__remove_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *old) { list_del(&old->task_list); } @@ -249,7 +252,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); (!__builtin_constant_p(state) || \ state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ -extern void init_wait_entry(wait_queue_t *__wait, int flags); +extern void init_wait_entry(wait_queue_entry_t *__wait, int flags); /* * The below macro ___wait_event() has an explicit shadow of the __ret @@ -266,7 +269,7 @@ extern void init_wait_entry(wait_queue_t *__wait, int flags); #define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ ({ \ __label__ __out; \ - wait_queue_t __wait; \ + wait_queue_entry_t __wait; \ long __ret = ret; /* explicit shadow */ \ \ init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \ @@ -620,8 +623,8 @@ do { \ __ret; \ }) -extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *); -extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *); +extern int do_wait_intr(wait_queue_head_t *, wait_queue_entry_t *); +extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); #define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \ ({ \ @@ -967,17 +970,17 @@ do { \ /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ -void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); -void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); -long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state); -void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); -long wait_woken(wait_queue_t *wait, unsigned mode, long timeout); -int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); +void prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); +void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); +long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); +void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait); +long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout); +int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); +int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); +int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); #define DEFINE_WAIT_FUNC(name, function) \ - wait_queue_t name = { \ + wait_queue_entry_t name = { \ .private = current, \ .func = function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ -- cgit v1.2.3 From 50816c48997af857d4bab3dca1aba90339705e96 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 10:33:16 +0100 Subject: sched/wait: Standardize internal naming of wait-queue entries So the various wait-queue entry variables in include/linux/wait.h and kernel/sched/wait.c are named in a colorfully inconsistent way: wait_queue_entry_t *wait wait_queue_entry_t *__wait (even in plain C code!) wait_queue_entry_t *q (!) wait_queue_entry_t *new (making anyone who knows C++ cringe) wait_queue_entry_t *old I think part of the reason for the inconsistency is the constant apparent confusion about what a wait queue 'head' versus 'entry' is. ( Some of the documentation talks about a 'wait descriptor', which is the wait-queue entry itself - further adding to the confusion. ) The most common name is 'wait', but that in itself is somewhat ambiguous as well, as it does not really make it clear whether it's a wait-queue entry or head. To improve all this name the wait-queue entry structure parameters and variables consistently and push through this naming into all the wait.h and wait.c code: struct wait_queue_entry *wq_entry The 'wq_' prefix makes it easy to grep for, and we also use the opportunity to move away from the typedef to a plain 'struct' naming: in the kernel we typically reserve typedefs for cases where a C structure is really small and somewhat opaque - such as pte_t. wait-queue entries are neither small nor opaque, so use the more standard 'struct xxx_entry' list management code nomenclature instead. ( We don't touch external users, and we preserve the typedef as well for actual wait-queue users, to reduce unnecessary churn. ) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 84 ++++++++++++++++++++++++++-------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 5889f0c86ff7..77fdea851d8b 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -11,8 +11,9 @@ #include typedef struct wait_queue_entry wait_queue_entry_t; -typedef int (*wait_queue_func_t)(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); -int default_wake_function(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); + +typedef int (*wait_queue_func_t)(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key); +int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key); /* wait_queue_entry::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 @@ -37,7 +38,7 @@ struct wait_bit_key { struct wait_bit_queue { struct wait_bit_key key; - wait_queue_entry_t wait; + struct wait_queue_entry wait; }; struct __wait_queue_head { @@ -58,7 +59,7 @@ struct task_struct; .task_list = { NULL, NULL } } #define DECLARE_WAITQUEUE(name, tsk) \ - wait_queue_entry_t name = __WAITQUEUE_INITIALIZER(name, tsk) + struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk) #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ @@ -91,19 +92,19 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name) #endif -static inline void init_waitqueue_entry(wait_queue_entry_t *q, struct task_struct *p) +static inline void init_waitqueue_entry(struct wait_queue_entry *wq_entry, struct task_struct *p) { - q->flags = 0; - q->private = p; - q->func = default_wake_function; + wq_entry->flags = 0; + wq_entry->private = p; + wq_entry->func = default_wake_function; } static inline void -init_waitqueue_func_entry(wait_queue_entry_t *q, wait_queue_func_t func) +init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t func) { - q->flags = 0; - q->private = NULL; - q->func = func; + wq_entry->flags = 0; + wq_entry->private = NULL; + wq_entry->func = func; } /** @@ -162,42 +163,41 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq) return waitqueue_active(wq); } -extern void add_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait); -extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait); -extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_entry_t *wait); +extern void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +extern void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); -static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *new) +static inline void __add_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) { - list_add(&new->task_list, &head->task_list); + list_add(&wq_entry->task_list, &head->task_list); } /* * Used for wake-one threads: */ static inline void -__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) +__add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) { - wait->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue(q, wait); + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue(q, wq_entry); } -static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, - wait_queue_entry_t *new) +static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) { - list_add_tail(&new->task_list, &head->task_list); + list_add_tail(&wq_entry->task_list, &head->task_list); } static inline void -__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait) +__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) { - wait->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue_entry_tail(q, wait); + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue_entry_tail(q, wq_entry); } static inline void -__remove_wait_queue(wait_queue_head_t *head, wait_queue_entry_t *old) +__remove_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) { - list_del(&old->task_list); + list_del(&wq_entry->task_list); } typedef int wait_bit_action_f(struct wait_bit_key *, int mode); @@ -252,7 +252,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); (!__builtin_constant_p(state) || \ state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ -extern void init_wait_entry(wait_queue_entry_t *__wait, int flags); +extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags); /* * The below macro ___wait_event() has an explicit shadow of the __ret @@ -269,12 +269,12 @@ extern void init_wait_entry(wait_queue_entry_t *__wait, int flags); #define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ ({ \ __label__ __out; \ - wait_queue_entry_t __wait; \ + struct wait_queue_entry __wq_entry; \ long __ret = ret; /* explicit shadow */ \ \ - init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \ + init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);\ for (;;) { \ - long __int = prepare_to_wait_event(&wq, &__wait, state);\ + long __int = prepare_to_wait_event(&wq, &__wq_entry, state);\ \ if (condition) \ break; \ @@ -286,7 +286,7 @@ extern void init_wait_entry(wait_queue_entry_t *__wait, int flags); \ cmd; \ } \ - finish_wait(&wq, &__wait); \ + finish_wait(&wq, &__wq_entry); \ __out: __ret; \ }) @@ -970,17 +970,17 @@ do { \ /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ -void prepare_to_wait(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); -void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); -long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_entry_t *wait, int state); -void finish_wait(wait_queue_head_t *q, wait_queue_entry_t *wait); -long wait_woken(wait_queue_entry_t *wait, unsigned mode, long timeout); -int woken_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); -int autoremove_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); -int wake_bit_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); +void prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); +void prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); +long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); +void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); +int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); +int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); +int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); #define DEFINE_WAIT_FUNC(name, function) \ - wait_queue_entry_t name = { \ + struct wait_queue_entry name = { \ .private = current, \ .func = function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ -- cgit v1.2.3 From 9d9d676f595b5081326be7a17dc681fcb38fb3b2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 11:10:18 +0100 Subject: sched/wait: Standardize internal naming of wait-queue heads The wait-queue head parameters and variables are named in a couple of ways, we have the following variants currently: wait_queue_head_t *q wait_queue_head_t *wq wait_queue_head_t *head In particular the 'wq' naming is ambiguous in the sense whether it's a wait-queue head or entry name - as entries were often named 'wait'. ( Not to mention the confusion of any readers coming over from workqueue-land. ) Standardize all this around a single, unambiguous parameter and variable name: struct wait_queue_head *wq_head which is easy to grep for and also rhymes nicely with the wait-queue entry naming: struct wait_queue_entry *wq_entry Also rename: struct __wait_queue_head => struct wait_queue_head ... and use this struct type to migrate from typedefs usage to 'struct' usage, which is more in line with existing kernel practices. Don't touch any external users and preserve the main wait_queue_head_t typedef. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 76 ++++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 77fdea851d8b..c3d1cefc7853 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -41,11 +41,11 @@ struct wait_bit_queue { struct wait_queue_entry wait; }; -struct __wait_queue_head { +struct wait_queue_head { spinlock_t lock; struct list_head task_list; }; -typedef struct __wait_queue_head wait_queue_head_t; +typedef struct wait_queue_head wait_queue_head_t; struct task_struct; @@ -66,7 +66,7 @@ struct task_struct; .task_list = { &(name).task_list, &(name).task_list } } #define DECLARE_WAIT_QUEUE_HEAD(name) \ - wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name) + struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) #define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } @@ -74,20 +74,20 @@ struct task_struct; #define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } -extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *); +extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *); -#define init_waitqueue_head(q) \ +#define init_waitqueue_head(wq_head) \ do { \ static struct lock_class_key __key; \ \ - __init_waitqueue_head((q), #q, &__key); \ + __init_waitqueue_head((wq_head), #wq_head, &__key); \ } while (0) #ifdef CONFIG_LOCKDEP # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ ({ init_waitqueue_head(&name); name; }) # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \ - wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) + struct wait_queue_head name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) #else # define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name) #endif @@ -109,14 +109,14 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f /** * waitqueue_active -- locklessly test for waiters on the queue - * @q: the waitqueue to test for waiters + * @wq_head: the waitqueue to test for waiters * * returns true if the wait list is not empty * * NOTE: this function is lockless and requires care, incorrect usage _will_ * lead to sporadic and non-obvious failure. * - * Use either while holding wait_queue_head_t::lock or when used for wakeups + * Use either while holding wait_queue_head::lock or when used for wakeups * with an extra smp_mb() like: * * CPU0 - waker CPU1 - waiter @@ -137,9 +137,9 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), * which (when the lock is uncontended) are of roughly equal cost. */ -static inline int waitqueue_active(wait_queue_head_t *q) +static inline int waitqueue_active(struct wait_queue_head *wq_head) { - return !list_empty(&q->task_list); + return !list_empty(&wq_head->task_list); } /** @@ -150,7 +150,7 @@ static inline int waitqueue_active(wait_queue_head_t *q) * * Please refer to the comment for waitqueue_active. */ -static inline bool wq_has_sleeper(wait_queue_head_t *wq) +static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) { /* * We need to be sure we are in sync with the @@ -160,62 +160,62 @@ static inline bool wq_has_sleeper(wait_queue_head_t *wq) * waiting side. */ smp_mb(); - return waitqueue_active(wq); + return waitqueue_active(wq_head); } -extern void add_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); -extern void add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); -extern void remove_wait_queue(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); -static inline void __add_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) +static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add(&wq_entry->task_list, &head->task_list); + list_add(&wq_entry->task_list, &wq_head->task_list); } /* * Used for wake-one threads: */ static inline void -__add_wait_queue_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) +__add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { wq_entry->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue(q, wq_entry); + __add_wait_queue(wq_head, wq_entry); } -static inline void __add_wait_queue_entry_tail(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) +static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add_tail(&wq_entry->task_list, &head->task_list); + list_add_tail(&wq_entry->task_list, &wq_head->task_list); } static inline void -__add_wait_queue_entry_tail_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry) +__add_wait_queue_entry_tail_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { wq_entry->flags |= WQ_FLAG_EXCLUSIVE; - __add_wait_queue_entry_tail(q, wq_entry); + __add_wait_queue_entry_tail(wq_head, wq_entry); } static inline void -__remove_wait_queue(wait_queue_head_t *head, struct wait_queue_entry *wq_entry) +__remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { list_del(&wq_entry->task_list); } typedef int wait_bit_action_f(struct wait_bit_key *, int mode); -void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); -void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); -void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); -void __wake_up_bit(wait_queue_head_t *, void *, int); -int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); -int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); +void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); +void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); +void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); +void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); +void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); +void __wake_up_bit(struct wait_queue_head *, void *, int); +int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); +int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); void wake_up_bit(void *, int); void wake_up_atomic_t(atomic_t *); int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long); int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned); int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); -wait_queue_head_t *bit_waitqueue(void *, int); +struct wait_queue_head *bit_waitqueue(void *, int); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -970,10 +970,10 @@ do { \ /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ -void prepare_to_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); -void prepare_to_wait_exclusive(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); -long prepare_to_wait_event(wait_queue_head_t *q, struct wait_queue_entry *wq_entry, int state); -void finish_wait(wait_queue_head_t *q, struct wait_queue_entry *wq_entry); +void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +void prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); -- cgit v1.2.3 From 2141713616c652aeabf2dd5c1e89bc601c4fed6a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 11:25:39 +0100 Subject: sched/wait: Standardize 'struct wait_bit_queue' wait-queue entry field name Rename 'struct wait_bit_queue::wait' to ::wq_entry, to more clearly name it as a wait-queue entry. Propagate it to a couple of usage sites where the wait-bit-queue internals are exposed. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index c3d1cefc7853..1c8add685f22 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -38,7 +38,7 @@ struct wait_bit_key { struct wait_bit_queue { struct wait_bit_key key; - struct wait_queue_entry wait; + struct wait_queue_entry wq_entry; }; struct wait_queue_head { @@ -991,11 +991,11 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync #define DEFINE_WAIT_BIT(name, word, bit) \ struct wait_bit_queue name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ - .wait = { \ + .wq_entry = { \ .private = current, \ .func = wake_bit_function, \ .task_list = \ - LIST_HEAD_INIT((name).wait.task_list), \ + LIST_HEAD_INIT((name).wq_entry.task_list), \ }, \ } -- cgit v1.2.3 From 76c85ddc4695bb7b8209bfeff11f5156088f9197 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 11:35:27 +0100 Subject: sched/wait: Standardize wait_bit_queue naming So wait-bit-queue head variables are often named: struct wait_bit_queue *q ... which is a bit ambiguous and super confusing, because they clearly suggest wait-queue head semantics and behavior (they rhyme with the old wait_queue_t *q naming), while they are extended wait-queue _entries_, not heads! They are misnomers in two ways: - the 'wait_bit_queue' leaves open the question of whether it's an entry or a head - the 'q' parameter and local variable naming falsely implies that it's a 'queue' - while it's an entry. This resulted in sometimes confusing cases such as: finish_wait(wq, &q->wait); where the 'q' is not a wait-queue head, but a wait-bit-queue entry. So improve this all by standardizing wait-bit-queue nomenclature similar to wait-queue head naming: struct wait_bit_queue => struct wait_bit_queue_entry q => wbq_entry Which makes it all a much clearer: struct wait_bit_queue_entry *wbq_entry ... and turns the former confusing piece of code into: finish_wait(wq_head, &wbq_entry->wq_entry; which IMHO makes it apparently clear what we are doing, without having to analyze the context of the code: we are adding a wait-queue entry to a regular wait-queue head, which entry is embedded in a wait-bit-queue entry. I'm not a big fan of acronyms, but repeating wait_bit_queue_entry in field and local variable names is too long, so Hopefully it's clear enough that 'wq_' prefixes stand for wait-queues, while 'wbq_' prefixes stand for wait-bit-queues. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 1c8add685f22..fc7c32d82120 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -36,7 +36,7 @@ struct wait_bit_key { unsigned long timeout; }; -struct wait_bit_queue { +struct wait_bit_queue_entry { struct wait_bit_key key; struct wait_queue_entry wq_entry; }; @@ -207,8 +207,8 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_bit(struct wait_queue_head *, void *, int); -int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); -int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); +int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned); +int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned); void wake_up_bit(void *, int); void wake_up_atomic_t(atomic_t *); int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); @@ -989,7 +989,7 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) #define DEFINE_WAIT_BIT(name, word, bit) \ - struct wait_bit_queue name = { \ + struct wait_bit_queue_entry name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ .wq_entry = { \ .private = current, \ -- cgit v1.2.3 From 939798a072300698870b96756c38bb34c20f6c71 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 11:54:46 +0100 Subject: sched/wait: Improve the bit-wait API parameter names in the API function prototypes Contrary to kernel tradition, most of the bit-wait function prototypes in don't fully define the parameter names, they only list the types: int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long); ... which is pretty passive-aggressive in terms of informing the reader about what these functions are doing. Fill in the parameter names, such as: int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); Also turn spurious (and inconsistently utilized) cases of 'unsigned' into 'unsigned int'. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index fc7c32d82120..1338505d8b9f 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -200,22 +200,22 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq list_del(&wq_entry->task_list); } -typedef int wait_bit_action_f(struct wait_bit_key *, int mode); +typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); -void __wake_up_bit(struct wait_queue_head *, void *, int); -int __wait_on_bit(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned); -int __wait_on_bit_lock(struct wait_queue_head *, struct wait_bit_queue_entry *, wait_bit_action_f *, unsigned); -void wake_up_bit(void *, int); -void wake_up_atomic_t(atomic_t *); -int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); -int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long); -int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned); -int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); -struct wait_queue_head *bit_waitqueue(void *, int); +void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); +int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); +int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); +void wake_up_bit(void *word, int bit); +void wake_up_atomic_t(atomic_t *p); +int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); +int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); +int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); +int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); +struct wait_queue_head *bit_waitqueue(void *word, int bit); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -1008,10 +1008,10 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync } while (0) -extern int bit_wait(struct wait_bit_key *, int); -extern int bit_wait_io(struct wait_bit_key *, int); -extern int bit_wait_timeout(struct wait_bit_key *, int); -extern int bit_wait_io_timeout(struct wait_bit_key *, int); +extern int bit_wait(struct wait_bit_key *key, int bit); +extern int bit_wait_io(struct wait_bit_key *key, int bit); +extern int bit_wait_timeout(struct wait_bit_key *key, int bit); +extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit); /** * wait_on_bit - wait for a bit to be cleared -- cgit v1.2.3 From 4b1c480bfa3b246e292f4d50167756252a9717ed Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 12:07:33 +0100 Subject: sched/wait: Re-adjust macro line continuation backslashes in So there's over 300 CPP macro line-continuation backslashes in include/linux/wait.h (!!), which are aligned vertically to make the macro maze a bit more navigable. The recent renames and reorganization broke some of them, and instead of re-aligning them in every patch (which would add a lot of stylistic noise to the patches and make them less readable), I just ignored them - and fixed them up in a single go in this patch. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 645 +++++++++++++++++++++++++-------------------------- 1 file changed, 322 insertions(+), 323 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 1338505d8b9f..0805098f3589 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -53,34 +53,34 @@ struct task_struct; * Macros for declaration and initialisaton of the datatypes */ -#define __WAITQUEUE_INITIALIZER(name, tsk) { \ - .private = tsk, \ - .func = default_wake_function, \ +#define __WAITQUEUE_INITIALIZER(name, tsk) { \ + .private = tsk, \ + .func = default_wake_function, \ .task_list = { NULL, NULL } } -#define DECLARE_WAITQUEUE(name, tsk) \ +#define DECLARE_WAITQUEUE(name, tsk) \ struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk) -#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ +#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .task_list = { &(name).task_list, &(name).task_list } } #define DECLARE_WAIT_QUEUE_HEAD(name) \ struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) -#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ +#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } -#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ +#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *); -#define init_waitqueue_head(wq_head) \ - do { \ - static struct lock_class_key __key; \ - \ - __init_waitqueue_head((wq_head), #wq_head, &__key); \ +#define init_waitqueue_head(wq_head) \ + do { \ + static struct lock_class_key __key; \ + \ + __init_waitqueue_head((wq_head), #wq_head, &__key); \ } while (0) #ifdef CONFIG_LOCKDEP @@ -122,13 +122,13 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f * CPU0 - waker CPU1 - waiter * * for (;;) { - * @cond = true; prepare_to_wait(&wq, &wait, state); + * @cond = true; prepare_to_wait(&wq_head, &wait, state); * smp_mb(); // smp_mb() from set_current_state() - * if (waitqueue_active(wq)) if (@cond) - * wake_up(wq); break; + * if (waitqueue_active(wq_head)) if (@cond) + * wake_up(wq_head); break; * schedule(); * } - * finish_wait(&wq, &wait); + * finish_wait(&wq_head, &wait); * * Because without the explicit smp_mb() it's possible for the * waitqueue_active() load to get hoisted over the @cond store such that we'll @@ -144,9 +144,9 @@ static inline int waitqueue_active(struct wait_queue_head *wq_head) /** * wq_has_sleeper - check if there are any waiting processes - * @wq: wait queue head + * @wq_head: wait queue head * - * Returns true if wq has waiting processes + * Returns true if wq_head has waiting processes * * Please refer to the comment for waitqueue_active. */ @@ -231,26 +231,26 @@ struct wait_queue_head *bit_waitqueue(void *word, int bit); /* * Wakeup macros to be used to report events to the targets. */ -#define wake_up_poll(x, m) \ +#define wake_up_poll(x, m) \ __wake_up(x, TASK_NORMAL, 1, (void *) (m)) -#define wake_up_locked_poll(x, m) \ +#define wake_up_locked_poll(x, m) \ __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) -#define wake_up_interruptible_poll(x, m) \ +#define wake_up_interruptible_poll(x, m) \ __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) -#define wake_up_interruptible_sync_poll(x, m) \ +#define wake_up_interruptible_sync_poll(x, m) \ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m)) -#define ___wait_cond_timeout(condition) \ -({ \ - bool __cond = (condition); \ - if (__cond && !__ret) \ - __ret = 1; \ - __cond || !__ret; \ +#define ___wait_cond_timeout(condition) \ +({ \ + bool __cond = (condition); \ + if (__cond && !__ret) \ + __ret = 1; \ + __cond || !__ret; \ }) -#define ___wait_is_interruptible(state) \ - (!__builtin_constant_p(state) || \ - state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ +#define ___wait_is_interruptible(state) \ + (!__builtin_constant_p(state) || \ + state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags); @@ -266,108 +266,108 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags); * otherwise. */ -#define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ -({ \ - __label__ __out; \ - struct wait_queue_entry __wq_entry; \ - long __ret = ret; /* explicit shadow */ \ - \ - init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);\ - for (;;) { \ - long __int = prepare_to_wait_event(&wq, &__wq_entry, state);\ - \ - if (condition) \ - break; \ - \ - if (___wait_is_interruptible(state) && __int) { \ - __ret = __int; \ - goto __out; \ - } \ - \ - cmd; \ - } \ - finish_wait(&wq, &__wq_entry); \ -__out: __ret; \ +#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd) \ +({ \ + __label__ __out; \ + struct wait_queue_entry __wq_entry; \ + long __ret = ret; /* explicit shadow */ \ + \ + init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \ + for (;;) { \ + long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\ + \ + if (condition) \ + break; \ + \ + if (___wait_is_interruptible(state) && __int) { \ + __ret = __int; \ + goto __out; \ + } \ + \ + cmd; \ + } \ + finish_wait(&wq_head, &__wq_entry); \ +__out: __ret; \ }) -#define __wait_event(wq, condition) \ - (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ +#define __wait_event(wq_head, condition) \ + (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ schedule()) /** * wait_event - sleep until a condition gets true - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. + * the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. */ -#define wait_event(wq, condition) \ -do { \ - might_sleep(); \ - if (condition) \ - break; \ - __wait_event(wq, condition); \ +#define wait_event(wq_head, condition) \ +do { \ + might_sleep(); \ + if (condition) \ + break; \ + __wait_event(wq_head, condition); \ } while (0) -#define __io_wait_event(wq, condition) \ - (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ +#define __io_wait_event(wq_head, condition) \ + (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ io_schedule()) /* * io_wait_event() -- like wait_event() but with io_schedule() */ -#define io_wait_event(wq, condition) \ -do { \ - might_sleep(); \ - if (condition) \ - break; \ - __io_wait_event(wq, condition); \ +#define io_wait_event(wq_head, condition) \ +do { \ + might_sleep(); \ + if (condition) \ + break; \ + __io_wait_event(wq_head, condition); \ } while (0) -#define __wait_event_freezable(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ +#define __wait_event_freezable(wq_head, condition) \ + ___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \ schedule(); try_to_freeze()) /** * wait_event_freezable - sleep (or freeze) until a condition gets true - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute * to system load) until the @condition evaluates to true. The - * @condition is checked each time the waitqueue @wq is woken up. + * @condition is checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. */ -#define wait_event_freezable(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_freezable(wq, condition); \ - __ret; \ +#define wait_event_freezable(wq_head, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_freezable(wq_head, condition); \ + __ret; \ }) -#define __wait_event_timeout(wq, condition, timeout) \ - ___wait_event(wq, ___wait_cond_timeout(condition), \ - TASK_UNINTERRUPTIBLE, 0, timeout, \ +#define __wait_event_timeout(wq_head, condition, timeout) \ + ___wait_event(wq_head, ___wait_cond_timeout(condition), \ + TASK_UNINTERRUPTIBLE, 0, timeout, \ __ret = schedule_timeout(__ret)) /** * wait_event_timeout - sleep until a condition gets true or a timeout elapses - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @timeout: timeout, in jiffies * * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. + * the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -378,83 +378,83 @@ do { \ * or the remaining jiffies (at least 1) if the @condition evaluated * to %true before the @timeout elapsed. */ -#define wait_event_timeout(wq, condition, timeout) \ -({ \ - long __ret = timeout; \ - might_sleep(); \ - if (!___wait_cond_timeout(condition)) \ - __ret = __wait_event_timeout(wq, condition, timeout); \ - __ret; \ +#define wait_event_timeout(wq_head, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_timeout(wq_head, condition, timeout); \ + __ret; \ }) -#define __wait_event_freezable_timeout(wq, condition, timeout) \ - ___wait_event(wq, ___wait_cond_timeout(condition), \ - TASK_INTERRUPTIBLE, 0, timeout, \ +#define __wait_event_freezable_timeout(wq_head, condition, timeout) \ + ___wait_event(wq_head, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ __ret = schedule_timeout(__ret); try_to_freeze()) /* * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid * increasing load and is freezable. */ -#define wait_event_freezable_timeout(wq, condition, timeout) \ -({ \ - long __ret = timeout; \ - might_sleep(); \ - if (!___wait_cond_timeout(condition)) \ - __ret = __wait_event_freezable_timeout(wq, condition, timeout); \ - __ret; \ +#define wait_event_freezable_timeout(wq_head, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_freezable_timeout(wq_head, condition, timeout); \ + __ret; \ }) -#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ - (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0, \ +#define __wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2) \ + (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 1, 0, \ cmd1; schedule(); cmd2) /* * Just like wait_event_cmd(), except it sets exclusive flag */ -#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ -do { \ - if (condition) \ - break; \ - __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2); \ +#define wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2) \ +do { \ + if (condition) \ + break; \ + __wait_event_exclusive_cmd(wq_head, condition, cmd1, cmd2); \ } while (0) -#define __wait_event_cmd(wq, condition, cmd1, cmd2) \ - (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ +#define __wait_event_cmd(wq_head, condition, cmd1, cmd2) \ + (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ cmd1; schedule(); cmd2) /** * wait_event_cmd - sleep until a condition gets true - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @cmd1: the command will be executed before sleep * @cmd2: the command will be executed after sleep * * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. + * the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. */ -#define wait_event_cmd(wq, condition, cmd1, cmd2) \ -do { \ - if (condition) \ - break; \ - __wait_event_cmd(wq, condition, cmd1, cmd2); \ +#define wait_event_cmd(wq_head, condition, cmd1, cmd2) \ +do { \ + if (condition) \ + break; \ + __wait_event_cmd(wq_head, condition, cmd1, cmd2); \ } while (0) -#define __wait_event_interruptible(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ +#define __wait_event_interruptible(wq_head, condition) \ + ___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \ schedule()) /** * wait_event_interruptible - sleep until a condition gets true - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * * The process is put to sleep (TASK_INTERRUPTIBLE) until the * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. + * The @condition is checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -462,29 +462,29 @@ do { \ * The function will return -ERESTARTSYS if it was interrupted by a * signal and 0 if @condition evaluated to true. */ -#define wait_event_interruptible(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_interruptible(wq, condition); \ - __ret; \ +#define wait_event_interruptible(wq_head, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_interruptible(wq_head, condition); \ + __ret; \ }) -#define __wait_event_interruptible_timeout(wq, condition, timeout) \ - ___wait_event(wq, ___wait_cond_timeout(condition), \ - TASK_INTERRUPTIBLE, 0, timeout, \ +#define __wait_event_interruptible_timeout(wq_head, condition, timeout) \ + ___wait_event(wq_head, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ __ret = schedule_timeout(__ret)) /** * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @timeout: timeout, in jiffies * * The process is put to sleep (TASK_INTERRUPTIBLE) until the * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. + * The @condition is checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -496,50 +496,49 @@ do { \ * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was * interrupted by a signal. */ -#define wait_event_interruptible_timeout(wq, condition, timeout) \ -({ \ - long __ret = timeout; \ - might_sleep(); \ - if (!___wait_cond_timeout(condition)) \ - __ret = __wait_event_interruptible_timeout(wq, \ - condition, timeout); \ - __ret; \ +#define wait_event_interruptible_timeout(wq_head, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_interruptible_timeout(wq_head, \ + condition, timeout); \ + __ret; \ }) -#define __wait_event_hrtimeout(wq, condition, timeout, state) \ -({ \ - int __ret = 0; \ - struct hrtimer_sleeper __t; \ - \ - hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \ - HRTIMER_MODE_REL); \ - hrtimer_init_sleeper(&__t, current); \ - if ((timeout) != KTIME_MAX) \ - hrtimer_start_range_ns(&__t.timer, timeout, \ - current->timer_slack_ns, \ - HRTIMER_MODE_REL); \ - \ - __ret = ___wait_event(wq, condition, state, 0, 0, \ - if (!__t.task) { \ - __ret = -ETIME; \ - break; \ - } \ - schedule()); \ - \ - hrtimer_cancel(&__t.timer); \ - destroy_hrtimer_on_stack(&__t.timer); \ - __ret; \ +#define __wait_event_hrtimeout(wq_head, condition, timeout, state) \ +({ \ + int __ret = 0; \ + struct hrtimer_sleeper __t; \ + \ + hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); \ + hrtimer_init_sleeper(&__t, current); \ + if ((timeout) != KTIME_MAX) \ + hrtimer_start_range_ns(&__t.timer, timeout, \ + current->timer_slack_ns, \ + HRTIMER_MODE_REL); \ + \ + __ret = ___wait_event(wq_head, condition, state, 0, 0, \ + if (!__t.task) { \ + __ret = -ETIME; \ + break; \ + } \ + schedule()); \ + \ + hrtimer_cancel(&__t.timer); \ + destroy_hrtimer_on_stack(&__t.timer); \ + __ret; \ }) /** * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @timeout: timeout, as a ktime_t * * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. + * The @condition is checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -547,25 +546,25 @@ do { \ * The function returns 0 if @condition became true, or -ETIME if the timeout * elapsed. */ -#define wait_event_hrtimeout(wq, condition, timeout) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_hrtimeout(wq, condition, timeout, \ - TASK_UNINTERRUPTIBLE); \ - __ret; \ +#define wait_event_hrtimeout(wq_head, condition, timeout) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_hrtimeout(wq_head, condition, timeout, \ + TASK_UNINTERRUPTIBLE); \ + __ret; \ }) /** * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @timeout: timeout, as a ktime_t * * The process is put to sleep (TASK_INTERRUPTIBLE) until the * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. + * The @condition is checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -573,73 +572,73 @@ do { \ * The function returns 0 if @condition became true, -ERESTARTSYS if it was * interrupted by a signal, or -ETIME if the timeout elapsed. */ -#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \ -({ \ - long __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_hrtimeout(wq, condition, timeout, \ - TASK_INTERRUPTIBLE); \ - __ret; \ +#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \ +({ \ + long __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_hrtimeout(wq, condition, timeout, \ + TASK_INTERRUPTIBLE); \ + __ret; \ }) -#define __wait_event_interruptible_exclusive(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ +#define __wait_event_interruptible_exclusive(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ schedule()) -#define wait_event_interruptible_exclusive(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_interruptible_exclusive(wq, condition);\ - __ret; \ +#define wait_event_interruptible_exclusive(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_interruptible_exclusive(wq, condition); \ + __ret; \ }) -#define __wait_event_killable_exclusive(wq, condition) \ - ___wait_event(wq, condition, TASK_KILLABLE, 1, 0, \ +#define __wait_event_killable_exclusive(wq, condition) \ + ___wait_event(wq, condition, TASK_KILLABLE, 1, 0, \ schedule()) -#define wait_event_killable_exclusive(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_killable_exclusive(wq, condition); \ - __ret; \ +#define wait_event_killable_exclusive(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_killable_exclusive(wq, condition); \ + __ret; \ }) -#define __wait_event_freezable_exclusive(wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ +#define __wait_event_freezable_exclusive(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ schedule(); try_to_freeze()) -#define wait_event_freezable_exclusive(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_freezable_exclusive(wq, condition);\ - __ret; \ +#define wait_event_freezable_exclusive(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_freezable_exclusive(wq, condition); \ + __ret; \ }) extern int do_wait_intr(wait_queue_head_t *, wait_queue_entry_t *); extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); -#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \ -({ \ - int __ret; \ - DEFINE_WAIT(__wait); \ - if (exclusive) \ - __wait.flags |= WQ_FLAG_EXCLUSIVE; \ - do { \ - __ret = fn(&(wq), &__wait); \ - if (__ret) \ - break; \ - } while (!(condition)); \ - __remove_wait_queue(&(wq), &__wait); \ - __set_current_state(TASK_RUNNING); \ - __ret; \ +#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \ +({ \ + int __ret; \ + DEFINE_WAIT(__wait); \ + if (exclusive) \ + __wait.flags |= WQ_FLAG_EXCLUSIVE; \ + do { \ + __ret = fn(&(wq), &__wait); \ + if (__ret) \ + break; \ + } while (!(condition)); \ + __remove_wait_queue(&(wq), &__wait); \ + __set_current_state(TASK_RUNNING); \ + __ret; \ }) @@ -666,8 +665,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); * The function will return -ERESTARTSYS if it was interrupted by a * signal and 0 if @condition evaluated to true. */ -#define wait_event_interruptible_locked(wq, condition) \ - ((condition) \ +#define wait_event_interruptible_locked(wq, condition) \ + ((condition) \ ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr)) /** @@ -693,8 +692,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); * The function will return -ERESTARTSYS if it was interrupted by a * signal and 0 if @condition evaluated to true. */ -#define wait_event_interruptible_locked_irq(wq, condition) \ - ((condition) \ +#define wait_event_interruptible_locked_irq(wq, condition) \ + ((condition) \ ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq)) /** @@ -724,8 +723,8 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); * The function will return -ERESTARTSYS if it was interrupted by a * signal and 0 if @condition evaluated to true. */ -#define wait_event_interruptible_exclusive_locked(wq, condition) \ - ((condition) \ +#define wait_event_interruptible_exclusive_locked(wq, condition) \ + ((condition) \ ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr)) /** @@ -755,12 +754,12 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); * The function will return -ERESTARTSYS if it was interrupted by a * signal and 0 if @condition evaluated to true. */ -#define wait_event_interruptible_exclusive_locked_irq(wq, condition) \ - ((condition) \ +#define wait_event_interruptible_exclusive_locked_irq(wq, condition) \ + ((condition) \ ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq)) -#define __wait_event_killable(wq, condition) \ +#define __wait_event_killable(wq, condition) \ ___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule()) /** @@ -778,21 +777,21 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); * The function will return -ERESTARTSYS if it was interrupted by a * signal and 0 if @condition evaluated to true. */ -#define wait_event_killable(wq, condition) \ -({ \ - int __ret = 0; \ - might_sleep(); \ - if (!(condition)) \ - __ret = __wait_event_killable(wq, condition); \ - __ret; \ +#define wait_event_killable(wq_head, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_killable(wq_head, condition); \ + __ret; \ }) -#define __wait_event_lock_irq(wq, condition, lock, cmd) \ - (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ +#define __wait_event_lock_irq(wq_head, condition, lock, cmd) \ + (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ spin_lock_irq(&lock)) /** @@ -800,7 +799,7 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); * condition is checked under the lock. This * is expected to be called with the lock * taken. - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @lock: a locked spinlock_t, which will be released before cmd * and schedule() and reacquired afterwards. @@ -809,7 +808,7 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); * * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. + * the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -818,11 +817,11 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); * dropped before invoking the cmd and going to sleep and is reacquired * afterwards. */ -#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \ -do { \ - if (condition) \ - break; \ - __wait_event_lock_irq(wq, condition, lock, cmd); \ +#define wait_event_lock_irq_cmd(wq_head, condition, lock, cmd) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq_head, condition, lock, cmd); \ } while (0) /** @@ -830,14 +829,14 @@ do { \ * condition is checked under the lock. This * is expected to be called with the lock * taken. - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @lock: a locked spinlock_t, which will be released before schedule() * and reacquired afterwards. * * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the * @condition evaluates to true. The @condition is checked each time - * the waitqueue @wq is woken up. + * the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -845,26 +844,26 @@ do { \ * This is supposed to be called while holding the lock. The lock is * dropped before going to sleep and is reacquired afterwards. */ -#define wait_event_lock_irq(wq, condition, lock) \ -do { \ - if (condition) \ - break; \ - __wait_event_lock_irq(wq, condition, lock, ); \ +#define wait_event_lock_irq(wq_head, condition, lock) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq_head, condition, lock, ); \ } while (0) -#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ +#define __wait_event_interruptible_lock_irq(wq_head, condition, lock, cmd) \ + ___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ spin_lock_irq(&lock)) /** * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true. * The condition is checked under the lock. This is expected to * be called with the lock taken. - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @lock: a locked spinlock_t, which will be released before cmd and * schedule() and reacquired afterwards. @@ -873,7 +872,7 @@ do { \ * * The process is put to sleep (TASK_INTERRUPTIBLE) until the * @condition evaluates to true or a signal is received. The @condition is - * checked each time the waitqueue @wq is woken up. + * checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -885,27 +884,27 @@ do { \ * The macro will return -ERESTARTSYS if it was interrupted by a signal * and 0 if @condition evaluated to true. */ -#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \ -({ \ - int __ret = 0; \ - if (!(condition)) \ - __ret = __wait_event_interruptible_lock_irq(wq, \ - condition, lock, cmd); \ - __ret; \ +#define wait_event_interruptible_lock_irq_cmd(wq_head, condition, lock, cmd) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = __wait_event_interruptible_lock_irq(wq_head, \ + condition, lock, cmd); \ + __ret; \ }) /** * wait_event_interruptible_lock_irq - sleep until a condition gets true. * The condition is checked under the lock. This is expected * to be called with the lock taken. - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @lock: a locked spinlock_t, which will be released before schedule() * and reacquired afterwards. * * The process is put to sleep (TASK_INTERRUPTIBLE) until the * @condition evaluates to true or signal is received. The @condition is - * checked each time the waitqueue @wq is woken up. + * checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -916,28 +915,28 @@ do { \ * The macro will return -ERESTARTSYS if it was interrupted by a signal * and 0 if @condition evaluated to true. */ -#define wait_event_interruptible_lock_irq(wq, condition, lock) \ -({ \ - int __ret = 0; \ - if (!(condition)) \ - __ret = __wait_event_interruptible_lock_irq(wq, \ - condition, lock,); \ - __ret; \ +#define wait_event_interruptible_lock_irq(wq_head, condition, lock) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = __wait_event_interruptible_lock_irq(wq_head, \ + condition, lock,); \ + __ret; \ }) -#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \ - lock, timeout) \ - ___wait_event(wq, ___wait_cond_timeout(condition), \ - TASK_INTERRUPTIBLE, 0, timeout, \ - spin_unlock_irq(&lock); \ - __ret = schedule_timeout(__ret); \ +#define __wait_event_interruptible_lock_irq_timeout(wq_head, condition, \ + lock, timeout) \ + ___wait_event(wq_head, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ + spin_unlock_irq(&lock); \ + __ret = schedule_timeout(__ret); \ spin_lock_irq(&lock)); /** * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets * true or a timeout elapses. The condition is checked under * the lock. This is expected to be called with the lock taken. - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * @lock: a locked spinlock_t, which will be released before schedule() * and reacquired afterwards. @@ -945,7 +944,7 @@ do { \ * * The process is put to sleep (TASK_INTERRUPTIBLE) until the * @condition evaluates to true or signal is received. The @condition is - * checked each time the waitqueue @wq is woken up. + * checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -957,14 +956,14 @@ do { \ * was interrupted by a signal, and the remaining jiffies otherwise * if the condition evaluated to true before the timeout elapsed. */ -#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \ - timeout) \ -({ \ - long __ret = timeout; \ - if (!___wait_cond_timeout(condition)) \ - __ret = __wait_event_interruptible_lock_irq_timeout( \ - wq, condition, lock, timeout); \ - __ret; \ +#define wait_event_interruptible_lock_irq_timeout(wq_head, condition, lock, \ + timeout) \ +({ \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_interruptible_lock_irq_timeout( \ + wq_head, condition, lock, timeout); \ + __ret; \ }) /* @@ -979,32 +978,32 @@ int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sy int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); -#define DEFINE_WAIT_FUNC(name, function) \ - struct wait_queue_entry name = { \ - .private = current, \ - .func = function, \ - .task_list = LIST_HEAD_INIT((name).task_list), \ +#define DEFINE_WAIT_FUNC(name, function) \ + struct wait_queue_entry name = { \ + .private = current, \ + .func = function, \ + .task_list = LIST_HEAD_INIT((name).task_list), \ } #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) -#define DEFINE_WAIT_BIT(name, word, bit) \ - struct wait_bit_queue_entry name = { \ - .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ - .wq_entry = { \ - .private = current, \ - .func = wake_bit_function, \ - .task_list = \ - LIST_HEAD_INIT((name).wq_entry.task_list), \ - }, \ +#define DEFINE_WAIT_BIT(name, word, bit) \ + struct wait_bit_queue_entry name = { \ + .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ + .wq_entry = { \ + .private = current, \ + .func = wake_bit_function, \ + .task_list = \ + LIST_HEAD_INIT((name).wq_entry.task_list), \ + }, \ } -#define init_wait(wait) \ - do { \ - (wait)->private = current; \ - (wait)->func = autoremove_wake_function; \ - INIT_LIST_HEAD(&(wait)->task_list); \ - (wait)->flags = 0; \ +#define init_wait(wait) \ + do { \ + (wait)->private = current; \ + (wait)->func = autoremove_wake_function; \ + INIT_LIST_HEAD(&(wait)->task_list); \ + (wait)->flags = 0; \ } while (0) -- cgit v1.2.3 From 5dd43ce2f69d42a71dcacdb13d17d8c0ac1fe8f7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jun 2017 12:19:09 +0200 Subject: sched/wait: Split out the wait_bit*() APIs from into The wait_bit*() types and APIs are mixed into wait.h, but they are a pretty orthogonal extension of wait-queues. Furthermore, only about 50 kernel files use these APIs, while over 1000 use the regular wait-queue functionality. So clean up the main wait.h by moving the wait-bit functionality out of it, into a separate .h and .c file: include/linux/wait_bit.h for types and APIs kernel/sched/wait_bit.c for the implementation Update all header dependencies. This reduces the size of wait.h rather significantly, by about 30%. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/fs.h | 2 +- include/linux/sunrpc/sched.h | 2 +- include/linux/wait.h | 250 ----------------------------------------- include/linux/wait_bit.h | 260 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 262 insertions(+), 252 deletions(-) create mode 100644 include/linux/wait_bit.h (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..53f7e49d8fe5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2,7 +2,7 @@ #define _LINUX_FS_H #include -#include +#include #include #include #include diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 7ba040c797ec..9d7529ffc4ce 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/wait.h b/include/linux/wait.h index 0805098f3589..629489746f8a 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -29,18 +29,6 @@ struct wait_queue_entry { struct list_head task_list; }; -struct wait_bit_key { - void *flags; - int bit_nr; -#define WAIT_ATOMIC_T_BIT_NR -1 - unsigned long timeout; -}; - -struct wait_bit_queue_entry { - struct wait_bit_key key; - struct wait_queue_entry wq_entry; -}; - struct wait_queue_head { spinlock_t lock; struct list_head task_list; @@ -68,12 +56,6 @@ struct task_struct; #define DECLARE_WAIT_QUEUE_HEAD(name) \ struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) -#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ - { .flags = word, .bit_nr = bit, } - -#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ - { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } - extern void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *); #define init_waitqueue_head(wq_head) \ @@ -200,22 +182,11 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq list_del(&wq_entry->task_list); } -typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); -void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); -int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); -int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); -void wake_up_bit(void *word, int bit); -void wake_up_atomic_t(atomic_t *p); -int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); -int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); -struct wait_queue_head *bit_waitqueue(void *word, int bit); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -976,7 +947,6 @@ void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_en long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); -int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); #define DEFINE_WAIT_FUNC(name, function) \ struct wait_queue_entry name = { \ @@ -987,17 +957,6 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) -#define DEFINE_WAIT_BIT(name, word, bit) \ - struct wait_bit_queue_entry name = { \ - .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ - .wq_entry = { \ - .private = current, \ - .func = wake_bit_function, \ - .task_list = \ - LIST_HEAD_INIT((name).wq_entry.task_list), \ - }, \ - } - #define init_wait(wait) \ do { \ (wait)->private = current; \ @@ -1006,213 +965,4 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync (wait)->flags = 0; \ } while (0) - -extern int bit_wait(struct wait_bit_key *key, int bit); -extern int bit_wait_io(struct wait_bit_key *key, int bit); -extern int bit_wait_timeout(struct wait_bit_key *key, int bit); -extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit); - -/** - * wait_on_bit - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit. - * For instance, if one were to have waiters on a bitflag, one would - * call wait_on_bit() in threads waiting for the bit to clear. - * One uses wait_on_bit() where one is waiting for the bit to clear, - * but has no intention of setting it. - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. - */ -static inline int -wait_on_bit(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit(word, bit, - bit_wait, - mode); -} - -/** - * wait_on_bit_io - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), but calls - * io_schedule() instead of schedule() for the actual waiting. - * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. - */ -static inline int -wait_on_bit_io(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit(word, bit, - bit_wait_io, - mode); -} - -/** - * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * @timeout: timeout, in jiffies - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), except also takes a - * timeout parameter. - * - * Returned value will be zero if the bit was cleared before the - * @timeout elapsed, or non-zero if the @timeout elapsed or process - * received a signal and the mode permitted wakeup on that signal. - */ -static inline int -wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, - unsigned long timeout) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_timeout(word, bit, - bit_wait_timeout, - mode, timeout); -} - -/** - * wait_on_bit_action - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared, and allow the waiting action to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. - * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. - */ -static inline int -wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, - unsigned mode) -{ - might_sleep(); - if (!test_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit(word, bit, action, mode); -} - -/** - * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit - * when one intends to set it, for instance, trying to lock bitflags. - * For instance, if one were to have waiters trying to set bitflag - * and waiting for it to clear before setting it, one would call - * wait_on_bit() in threads waiting to be able to set the bit. - * One uses wait_on_bit_lock() where one is waiting for the bit to - * clear with the intention of setting it, and when done, clearing it. - * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. - */ -static inline int -wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_and_set_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); -} - -/** - * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to atomically set it. This is similar - * to wait_on_bit(), but calls io_schedule() instead of schedule() - * for the actual waiting. - * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. - */ -static inline int -wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) -{ - might_sleep(); - if (!test_and_set_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); -} - -/** - * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to set it, and allow the waiting action - * to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. - * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. - */ -static inline int -wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, - unsigned mode) -{ - might_sleep(); - if (!test_and_set_bit(bit, word)) - return 0; - return out_of_line_wait_on_bit_lock(word, bit, action, mode); -} - -/** - * wait_on_atomic_t - Wait for an atomic_t to become 0 - * @val: The atomic value being waited on, a kernel virtual address - * @action: the function used to sleep, which may take special actions - * @mode: the task state to sleep in - * - * Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for - * the purpose of getting a waitqueue, but we set the key to a bit number - * outside of the target 'word'. - */ -static inline -int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) -{ - might_sleep(); - if (atomic_read(val) == 0) - return 0; - return out_of_line_wait_on_atomic_t(val, action, mode); -} - #endif /* _LINUX_WAIT_H */ diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h new file mode 100644 index 000000000000..8c85c52d94b6 --- /dev/null +++ b/include/linux/wait_bit.h @@ -0,0 +1,260 @@ +#ifndef _LINUX_WAIT_BIT_H +#define _LINUX_WAIT_BIT_H + +/* + * Linux wait-bit related types and methods: + */ +#include + +struct wait_bit_key { + void *flags; + int bit_nr; +#define WAIT_ATOMIC_T_BIT_NR -1 + unsigned long timeout; +}; + +struct wait_bit_queue_entry { + struct wait_bit_key key; + struct wait_queue_entry wq_entry; +}; + +#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ + { .flags = word, .bit_nr = bit, } + +#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ + { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } + +typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); +void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); +int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); +int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); +void wake_up_bit(void *word, int bit); +void wake_up_atomic_t(atomic_t *p); +int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); +int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); +int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); +int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); +struct wait_queue_head *bit_waitqueue(void *word, int bit); + +int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); + +#define DEFINE_WAIT_BIT(name, word, bit) \ + struct wait_bit_queue_entry name = { \ + .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ + .wq_entry = { \ + .private = current, \ + .func = wake_bit_function, \ + .task_list = \ + LIST_HEAD_INIT((name).wq_entry.task_list), \ + }, \ + } + +extern int bit_wait(struct wait_bit_key *key, int bit); +extern int bit_wait_io(struct wait_bit_key *key, int bit); +extern int bit_wait_timeout(struct wait_bit_key *key, int bit); +extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit); + +/** + * wait_on_bit - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit. + * For instance, if one were to have waiters on a bitflag, one would + * call wait_on_bit() in threads waiting for the bit to clear. + * One uses wait_on_bit() where one is waiting for the bit to clear, + * but has no intention of setting it. + * Returned value will be zero if the bit was cleared, or non-zero + * if the process received a signal and the mode permitted wakeup + * on that signal. + */ +static inline int +wait_on_bit(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit(word, bit, + bit_wait, + mode); +} + +/** + * wait_on_bit_io - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared. This is similar to wait_on_bit(), but calls + * io_schedule() instead of schedule() for the actual waiting. + * + * Returned value will be zero if the bit was cleared, or non-zero + * if the process received a signal and the mode permitted wakeup + * on that signal. + */ +static inline int +wait_on_bit_io(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit(word, bit, + bit_wait_io, + mode); +} + +/** + * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * @timeout: timeout, in jiffies + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared. This is similar to wait_on_bit(), except also takes a + * timeout parameter. + * + * Returned value will be zero if the bit was cleared before the + * @timeout elapsed, or non-zero if the @timeout elapsed or process + * received a signal and the mode permitted wakeup on that signal. + */ +static inline int +wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, + unsigned long timeout) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_timeout(word, bit, + bit_wait_timeout, + mode, timeout); +} + +/** + * wait_on_bit_action - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared, and allow the waiting action to be specified. + * This is like wait_on_bit() but allows fine control of how the waiting + * is done. + * + * Returned value will be zero if the bit was cleared, or non-zero + * if the process received a signal and the mode permitted wakeup + * on that signal. + */ +static inline int +wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit(word, bit, action, mode); +} + +/** + * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit + * when one intends to set it, for instance, trying to lock bitflags. + * For instance, if one were to have waiters trying to set bitflag + * and waiting for it to clear before setting it, one would call + * wait_on_bit() in threads waiting to be able to set the bit. + * One uses wait_on_bit_lock() where one is waiting for the bit to + * clear with the intention of setting it, and when done, clearing it. + * + * Returns zero if the bit was (eventually) found to be clear and was + * set. Returns non-zero if a signal was delivered to the process and + * the @mode allows that signal to wake the process. + */ +static inline int +wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_and_set_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); +} + +/** + * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared and then to atomically set it. This is similar + * to wait_on_bit(), but calls io_schedule() instead of schedule() + * for the actual waiting. + * + * Returns zero if the bit was (eventually) found to be clear and was + * set. Returns non-zero if a signal was delivered to the process and + * the @mode allows that signal to wake the process. + */ +static inline int +wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_and_set_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); +} + +/** + * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared and then to set it, and allow the waiting action + * to be specified. + * This is like wait_on_bit() but allows fine control of how the waiting + * is done. + * + * Returns zero if the bit was (eventually) found to be clear and was + * set. Returns non-zero if a signal was delivered to the process and + * the @mode allows that signal to wake the process. + */ +static inline int +wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) +{ + might_sleep(); + if (!test_and_set_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_lock(word, bit, action, mode); +} + +/** + * wait_on_atomic_t - Wait for an atomic_t to become 0 + * @val: The atomic value being waited on, a kernel virtual address + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for + * the purpose of getting a waitqueue, but we set the key to a bit number + * outside of the target 'word'. + */ +static inline +int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) +{ + might_sleep(); + if (atomic_read(val) == 0) + return 0; + return out_of_line_wait_on_atomic_t(val, action, mode); +} + +#endif /* _LINUX_WAIT_BIT_H */ -- cgit v1.2.3 From 5822a454d6d22297c5fcd66264120587b2ec21cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Mar 2017 13:09:07 +0100 Subject: sched/wait: Move bit_wait_table[] and related functionality from sched/core.c to sched/wait_bit.c The key hashed waitqueue data structures and their initialization was done in the main scheduler file for no good reason, move them to sched/wait_bit.c instead. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait_bit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 8c85c52d94b6..9cc82114dbcb 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -35,6 +35,7 @@ int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); struct wait_queue_head *bit_waitqueue(void *word, int bit); +extern void __init wait_bit_init(void); int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); -- cgit v1.2.3 From 2055da97389a605c8a00d163d40903afbe413921 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jun 2017 12:06:46 +0200 Subject: sched/wait: Disambiguate wq_entry->task_list and wq_head->task_list naming So I've noticed a number of instances where it was not obvious from the code whether ->task_list was for a wait-queue head or a wait-queue entry. Furthermore, there's a number of wait-queue users where the lists are not for 'tasks' but other entities (poll tables, etc.), in which case the 'task_list' name is actively confusing. To clear this all up, name the wait-queue head and entry list structure fields unambiguously: struct wait_queue_head::task_list => ::head struct wait_queue_entry::task_list => ::entry For example, this code: rqw->wait.task_list.next != &wait->task_list ... is was pretty unclear (to me) what it's doing, while now it's written this way: rqw->wait.head.next != &wait->entry ... which makes it pretty clear that we are iterating a list until we see the head. Other examples are: list_for_each_entry_safe(pos, next, &x->task_list, task_list) { list_for_each_entry(wq, &fence->wait.task_list, task_list) { ... where it's unclear (to me) what we are iterating, and during review it's hard to tell whether it's trying to walk a wait-queue entry (which would be a bug), while now it's written as: list_for_each_entry_safe(pos, next, &x->head, entry) { list_for_each_entry(wq, &fence->wait.head, entry) { Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/wait.h | 20 ++++++++++---------- include/linux/wait_bit.h | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 629489746f8a..b289c96151ee 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -26,12 +26,12 @@ struct wait_queue_entry { unsigned int flags; void *private; wait_queue_func_t func; - struct list_head task_list; + struct list_head entry; }; struct wait_queue_head { spinlock_t lock; - struct list_head task_list; + struct list_head head; }; typedef struct wait_queue_head wait_queue_head_t; @@ -44,14 +44,14 @@ struct task_struct; #define __WAITQUEUE_INITIALIZER(name, tsk) { \ .private = tsk, \ .func = default_wake_function, \ - .task_list = { NULL, NULL } } + .entry = { NULL, NULL } } #define DECLARE_WAITQUEUE(name, tsk) \ struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk) #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ - .task_list = { &(name).task_list, &(name).task_list } } + .head = { &(name).head, &(name).head } } #define DECLARE_WAIT_QUEUE_HEAD(name) \ struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) @@ -121,7 +121,7 @@ init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t f */ static inline int waitqueue_active(struct wait_queue_head *wq_head) { - return !list_empty(&wq_head->task_list); + return !list_empty(&wq_head->head); } /** @@ -151,7 +151,7 @@ extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add(&wq_entry->task_list, &wq_head->task_list); + list_add(&wq_entry->entry, &wq_head->head); } /* @@ -166,7 +166,7 @@ __add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_en static inline void __add_wait_queue_entry_tail(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_add_tail(&wq_entry->task_list, &wq_head->task_list); + list_add_tail(&wq_entry->entry, &wq_head->head); } static inline void @@ -179,7 +179,7 @@ __add_wait_queue_entry_tail_exclusive(struct wait_queue_head *wq_head, struct wa static inline void __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { - list_del(&wq_entry->task_list); + list_del(&wq_entry->entry); } void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); @@ -952,7 +952,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i struct wait_queue_entry name = { \ .private = current, \ .func = function, \ - .task_list = LIST_HEAD_INIT((name).task_list), \ + .entry = LIST_HEAD_INIT((name).entry), \ } #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) @@ -961,7 +961,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i do { \ (wait)->private = current; \ (wait)->func = autoremove_wake_function; \ - INIT_LIST_HEAD(&(wait)->task_list); \ + INIT_LIST_HEAD(&(wait)->entry); \ (wait)->flags = 0; \ } while (0) diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 9cc82114dbcb..12b26660d7e9 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -45,8 +45,8 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync .wq_entry = { \ .private = current, \ .func = wake_bit_function, \ - .task_list = \ - LIST_HEAD_INIT((name).wq_entry.task_list), \ + .entry = \ + LIST_HEAD_INIT((name).wq_entry.entry), \ }, \ } -- cgit v1.2.3 From a2bce3794a2122425abce5b0359d075b790930bc Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 7 Jun 2017 15:33:57 -0300 Subject: [media] media: Add userspace header file for i.MX This adds a header file for use by userspace programs wanting to interact with the i.MX media driver. It defines custom events and v4l2 controls for the i.MX v4l2 subdevices. Signed-off-by: Steve Longerbeam Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/imx-media.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/imx-media.h (limited to 'include/linux') diff --git a/include/linux/imx-media.h b/include/linux/imx-media.h new file mode 100644 index 000000000000..77221ecad6fc --- /dev/null +++ b/include/linux/imx-media.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2014-2017 Mentor Graphics Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the + * License, or (at your option) any later version + */ + +#ifndef __LINUX_IMX_MEDIA_H__ +#define __LINUX_IMX_MEDIA_H__ + +/* + * events from the subdevs + */ +#define V4L2_EVENT_IMX_CLASS V4L2_EVENT_PRIVATE_START +#define V4L2_EVENT_IMX_FRAME_INTERVAL_ERROR (V4L2_EVENT_IMX_CLASS + 1) + +enum imx_ctrl_id { + V4L2_CID_IMX_FIM_ENABLE = (V4L2_CID_USER_IMX_BASE + 0), + V4L2_CID_IMX_FIM_NUM, + V4L2_CID_IMX_FIM_TOLERANCE_MIN, + V4L2_CID_IMX_FIM_TOLERANCE_MAX, + V4L2_CID_IMX_FIM_NUM_SKIP, + V4L2_CID_IMX_FIM_ICAP_EDGE, + V4L2_CID_IMX_FIM_ICAP_CHANNEL, +}; + +#endif -- cgit v1.2.3 From f11cc0760b8397e0d230122606421b6a96e9f869 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 14 Jun 2017 19:37:30 -0700 Subject: sched/core: Drop the unused try_get_task_struct() helper function This function was introduced by: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and try_get_task_struct()") ... to allow easier usage of task_rcu_dereference(), however no users were ever added. Drop the helper. Signed-off-by: Davidlohr Bueso Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/20170615023730.22827-1-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/sched/task.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a978d7189cfd..f0f065c5afcf 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -95,8 +95,6 @@ static inline void put_task_struct(struct task_struct *t) } struct task_struct *task_rcu_dereference(struct task_struct **ptask); -struct task_struct *try_get_task_struct(struct task_struct **ptask); - #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT extern int arch_task_struct_size __read_mostly; -- cgit v1.2.3 From fdd2f5b7de2afaa931e5f7bad7bcda35d1f1b479 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 20 Jun 2017 07:05:40 -0500 Subject: fs: Separate out kiocb flags setup based on RWF_* flags Also added RWF_SUPPORTED to encompass all flags. Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Goldwyn Rodrigues Signed-off-by: Jens Axboe --- include/linux/fs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 023f0324762b..96a1a1fa54a9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3057,6 +3057,20 @@ static inline int iocb_flags(struct file *file) return res; } +static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags) +{ + if (unlikely(flags & ~RWF_SUPPORTED)) + return -EOPNOTSUPP; + + if (flags & RWF_HIPRI) + ki->ki_flags |= IOCB_HIPRI; + if (flags & RWF_DSYNC) + ki->ki_flags |= IOCB_DSYNC; + if (flags & RWF_SYNC) + ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + return 0; +} + static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; -- cgit v1.2.3 From 7fc9e4722435cd8459182c4975f48934f2bb1274 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 20 Jun 2017 07:05:41 -0500 Subject: fs: Introduce filemap_range_has_page() filemap_range_has_page() return true if the file's mapping has a page within the range mentioned. This function will be used to check if a write() call will cause a writeback of previous writes. Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Goldwyn Rodrigues Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 96a1a1fa54a9..0d34f5b5a6b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2518,6 +2518,8 @@ extern int filemap_fdatawait(struct address_space *); extern void filemap_fdatawait_keep_errors(struct address_space *); extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); +extern bool filemap_range_has_page(struct address_space *, loff_t lstart, + loff_t lend); extern int filemap_write_and_wait(struct address_space *mapping); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); -- cgit v1.2.3 From b745fafaf70c0a98a2e1e7ac8cb14542889ceb0e Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 20 Jun 2017 07:05:43 -0500 Subject: fs: Introduce RWF_NOWAIT and FMODE_AIO_NOWAIT RWF_NOWAIT informs kernel to bail out if an AIO request will block for reasons such as file allocations, or a writeback triggered, or would block while allocating requests while performing direct I/O. RWF_NOWAIT is translated to IOCB_NOWAIT for iocb->ki_flags. FMODE_AIO_NOWAIT is a flag which identifies the file opened is capable of returning -EAGAIN if the AIO call will block. This must be set by supporting filesystems in the ->open() call. Filesystems xfs, btrfs and ext4 would be supported in the following patches. Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Goldwyn Rodrigues Signed-off-by: Jens Axboe --- include/linux/fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0d34f5b5a6b0..4574121f4746 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -143,6 +143,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) +/* File is capable of returning -EAGAIN if AIO will block */ +#define FMODE_AIO_NOWAIT ((__force fmode_t)0x8000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are @@ -269,6 +272,7 @@ struct writeback_control; #define IOCB_DSYNC (1 << 4) #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) +#define IOCB_NOWAIT (1 << 7) struct kiocb { struct file *ki_filp; @@ -3064,6 +3068,11 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags) if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; + if (flags & RWF_NOWAIT) { + if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT)) + return -EOPNOTSUPP; + ki->ki_flags |= IOCB_NOWAIT; + } if (flags & RWF_HIPRI) ki->ki_flags |= IOCB_HIPRI; if (flags & RWF_DSYNC) -- cgit v1.2.3 From a38d1243704f501a4c42de1db1062ff6eba83453 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 20 Jun 2017 07:05:45 -0500 Subject: fs: Introduce IOMAP_NOWAIT IOCB_NOWAIT translates to IOMAP_NOWAIT for iomaps. This is used by XFS in the XFS patch. Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Goldwyn Rodrigues Signed-off-by: Jens Axboe --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index f753e788da31..69f4e9470084 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -52,6 +52,7 @@ struct iomap { #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ #define IOMAP_DIRECT (1 << 4) /* direct I/O */ +#define IOMAP_NOWAIT (1 << 5) /* Don't wait for writeback */ struct iomap_ops { /* -- cgit v1.2.3 From 03a07c92a9ed9938d828ca7f1d11b8bc63a7bb89 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 20 Jun 2017 07:05:46 -0500 Subject: block: return on congested block device A new bio operation flag REQ_NOWAIT is introduced to identify bio's orignating from iocb with IOCB_NOWAIT. This flag indicates to return immediately if a request cannot be made instead of retrying. Stacked devices such as md (the ones with make_request_fn hooks) currently are not supported because it may block for housekeeping. For example, an md can have a part of the device suspended. For this reason, only request based devices are supported. In the future, this feature will be expanded to stacked devices by teaching them how to handle the REQ_NOWAIT flags. Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Goldwyn Rodrigues Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ++++++ include/linux/blk_types.h | 4 ++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 40d054185277..36aa641cde28 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -416,6 +416,12 @@ static inline void bio_io_error(struct bio *bio) bio_endio(bio); } +static inline void bio_wouldblock_error(struct bio *bio) +{ + bio->bi_status = BLK_STS_AGAIN; + bio_endio(bio); +} + struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dcd45b15a3a5..e210da6d14b8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -36,6 +36,8 @@ typedef u8 __bitwise blk_status_t; /* hack for device mapper, don't use elsewhere: */ #define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) +#define BLK_STS_AGAIN ((__force blk_status_t)12) + struct blk_issue_stat { u64 stat; }; @@ -224,6 +226,7 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ + __REQ_NOWAIT, /* Don't wait if request will block */ __REQ_NR_BITS, /* stops here */ }; @@ -242,6 +245,7 @@ enum req_flag_bits { #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) +#define REQ_NOWAIT (1ULL << __REQ_NOWAIT) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) -- cgit v1.2.3 From 1a4a69751f4d24ffd3530f5a9694636db1566a3b Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 20 Jun 2017 16:00:00 +0300 Subject: qed: Chain support for external PBL iWARP would require the chains to allocate/free their PBL memory independently, so add the infrastructure to provide it externally. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 7 +++++++ include/linux/qed/qed_if.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 5cd7a4608c9b..59ddf9af909e 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -80,6 +80,11 @@ struct qed_chain_pbl_u32 { u32 cons_page_idx; }; +struct qed_chain_ext_pbl { + dma_addr_t p_pbl_phys; + void *p_pbl_virt; +}; + struct qed_chain_u16 { /* Cyclic index of next element to produce/consme */ u16 prod_idx; @@ -155,6 +160,8 @@ struct qed_chain { u32 size; u8 intended_use; + + bool b_external_pbl; }; #define QED_CHAIN_PBL_ENTRY_SIZE (8) diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 74f6b99754aa..ef39c7f40ae6 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -634,7 +634,8 @@ struct qed_common_ops { enum qed_chain_cnt_type cnt_type, u32 num_elems, size_t elem_size, - struct qed_chain *p_chain); + struct qed_chain *p_chain, + struct qed_chain_ext_pbl *ext_pbl); void (*chain_free)(struct qed_dev *cdev, struct qed_chain *p_chain); -- cgit v1.2.3 From b262a06e642cfb1eeb6c2c772f76dad674ada57e Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 20 Jun 2017 16:00:03 +0300 Subject: qed*: qede_roce.[ch] -> qede_rdma.[ch] Once we have iWARP support, the qede portion of the qedr<->qede would serve all the RDMA protocols - so rename the file to be appropriate to its function. While we're at it, we're also moving a couple of inclusions to it into .h files and adding includes to make sure it contains all type definitions it requires. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qede_rdma.h | 93 +++++++++++++++++++++++++++++++++++++++++++ include/linux/qed/qede_roce.h | 88 ---------------------------------------- 2 files changed, 93 insertions(+), 88 deletions(-) create mode 100644 include/linux/qed/qede_rdma.h delete mode 100644 include/linux/qed/qede_roce.h (limited to 'include/linux') diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h new file mode 100644 index 000000000000..a1a9b81f7612 --- /dev/null +++ b/include/linux/qed/qede_rdma.h @@ -0,0 +1,93 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef QEDE_ROCE_H +#define QEDE_ROCE_H + +#include +#include +#include +#include + +struct qedr_dev; +struct qed_dev; +struct qede_dev; + +enum qede_roce_event { + QEDE_UP, + QEDE_DOWN, + QEDE_CHANGE_ADDR, + QEDE_CLOSE +}; + +struct qede_roce_event_work { + struct list_head list; + struct work_struct work; + void *ptr; + enum qede_roce_event event; +}; + +struct qedr_driver { + unsigned char name[32]; + + struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *, + struct net_device *); + + void (*remove)(struct qedr_dev *); + void (*notify)(struct qedr_dev *, enum qede_roce_event); +}; + +/* APIs for RoCE driver to register callback handlers, + * which will be invoked when device is added, removed, ifup, ifdown + */ +int qede_roce_register_driver(struct qedr_driver *drv); +void qede_roce_unregister_driver(struct qedr_driver *drv); + +bool qede_roce_supported(struct qede_dev *dev); + +#if IS_ENABLED(CONFIG_QED_RDMA) +int qede_roce_dev_add(struct qede_dev *dev); +void qede_roce_dev_event_open(struct qede_dev *dev); +void qede_roce_dev_event_close(struct qede_dev *dev); +void qede_roce_dev_remove(struct qede_dev *dev); +void qede_roce_event_changeaddr(struct qede_dev *qedr); +#else +static inline int qede_roce_dev_add(struct qede_dev *dev) +{ + return 0; +} + +static inline void qede_roce_dev_event_open(struct qede_dev *dev) {} +static inline void qede_roce_dev_event_close(struct qede_dev *dev) {} +static inline void qede_roce_dev_remove(struct qede_dev *dev) {} +static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {} +#endif +#endif diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h deleted file mode 100644 index 3b8dd551a98c..000000000000 --- a/include/linux/qed/qede_roce.h +++ /dev/null @@ -1,88 +0,0 @@ -/* QLogic qedr NIC Driver - * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef QEDE_ROCE_H -#define QEDE_ROCE_H - -struct qedr_dev; -struct qed_dev; -struct qede_dev; - -enum qede_roce_event { - QEDE_UP, - QEDE_DOWN, - QEDE_CHANGE_ADDR, - QEDE_CLOSE -}; - -struct qede_roce_event_work { - struct list_head list; - struct work_struct work; - void *ptr; - enum qede_roce_event event; -}; - -struct qedr_driver { - unsigned char name[32]; - - struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *, - struct net_device *); - - void (*remove)(struct qedr_dev *); - void (*notify)(struct qedr_dev *, enum qede_roce_event); -}; - -/* APIs for RoCE driver to register callback handlers, - * which will be invoked when device is added, removed, ifup, ifdown - */ -int qede_roce_register_driver(struct qedr_driver *drv); -void qede_roce_unregister_driver(struct qedr_driver *drv); - -bool qede_roce_supported(struct qede_dev *dev); - -#if IS_ENABLED(CONFIG_QED_RDMA) -int qede_roce_dev_add(struct qede_dev *dev); -void qede_roce_dev_event_open(struct qede_dev *dev); -void qede_roce_dev_event_close(struct qede_dev *dev); -void qede_roce_dev_remove(struct qede_dev *dev); -void qede_roce_event_changeaddr(struct qede_dev *qedr); -#else -static inline int qede_roce_dev_add(struct qede_dev *dev) -{ - return 0; -} - -static inline void qede_roce_dev_event_open(struct qede_dev *dev) {} -static inline void qede_roce_dev_event_close(struct qede_dev *dev) {} -static inline void qede_roce_dev_remove(struct qede_dev *dev) {} -static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {} -#endif -#endif -- cgit v1.2.3 From bbfcd1e8e1677b1e692144c5709945e1dfe1ed30 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 20 Jun 2017 16:00:04 +0300 Subject: qed*: Set rdma generic functions prefix Rename the functions common to both iWARP and RoCE to have a prefix of _rdma_ instead of _roce_. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qede_rdma.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h index a1a9b81f7612..1348a16e5e4b 100644 --- a/include/linux/qed/qede_rdma.h +++ b/include/linux/qed/qede_rdma.h @@ -41,18 +41,18 @@ struct qedr_dev; struct qed_dev; struct qede_dev; -enum qede_roce_event { +enum qede_rdma_event { QEDE_UP, QEDE_DOWN, QEDE_CHANGE_ADDR, QEDE_CLOSE }; -struct qede_roce_event_work { +struct qede_rdma_event_work { struct list_head list; struct work_struct work; void *ptr; - enum qede_roce_event event; + enum qede_rdma_event event; }; struct qedr_driver { @@ -62,32 +62,33 @@ struct qedr_driver { struct net_device *); void (*remove)(struct qedr_dev *); - void (*notify)(struct qedr_dev *, enum qede_roce_event); + void (*notify)(struct qedr_dev *, enum qede_rdma_event); }; -/* APIs for RoCE driver to register callback handlers, +/* APIs for RDMA driver to register callback handlers, * which will be invoked when device is added, removed, ifup, ifdown */ -int qede_roce_register_driver(struct qedr_driver *drv); -void qede_roce_unregister_driver(struct qedr_driver *drv); +int qede_rdma_register_driver(struct qedr_driver *drv); +void qede_rdma_unregister_driver(struct qedr_driver *drv); -bool qede_roce_supported(struct qede_dev *dev); +bool qede_rdma_supported(struct qede_dev *dev); #if IS_ENABLED(CONFIG_QED_RDMA) -int qede_roce_dev_add(struct qede_dev *dev); -void qede_roce_dev_event_open(struct qede_dev *dev); -void qede_roce_dev_event_close(struct qede_dev *dev); -void qede_roce_dev_remove(struct qede_dev *dev); -void qede_roce_event_changeaddr(struct qede_dev *qedr); +int qede_rdma_dev_add(struct qede_dev *dev); +void qede_rdma_dev_event_open(struct qede_dev *dev); +void qede_rdma_dev_event_close(struct qede_dev *dev); +void qede_rdma_dev_remove(struct qede_dev *dev); +void qede_rdma_event_changeaddr(struct qede_dev *edr); + #else -static inline int qede_roce_dev_add(struct qede_dev *dev) +static inline int qede_rdma_dev_add(struct qede_dev *dev); { return 0; } -static inline void qede_roce_dev_event_open(struct qede_dev *dev) {} -static inline void qede_roce_dev_event_close(struct qede_dev *dev) {} -static inline void qede_roce_dev_remove(struct qede_dev *dev) {} -static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {} +static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {} +static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {} +static inline void qede_rdma_dev_remove(struct qede_dev *dev) {} +static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {} #endif #endif -- cgit v1.2.3 From de77b966ce8adcb4c58d50e2f087320d5479812a Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Sun, 18 Jun 2017 22:48:17 +0800 Subject: net: introduce __skb_put_[zero, data, u8] follow Johannes Berg, semantic patch file as below, @@ identifier p, p2; expression len; expression skb; type t, t2; @@ ( -p = __skb_put(skb, len); +p = __skb_put_zero(skb, len); | -p = (t)__skb_put(skb, len); +p = __skb_put_zero(skb, len); ) ... when != p ( p2 = (t2)p; -memset(p2, 0, len); | -memset(p, 0, len); ) @@ identifier p; expression len; expression skb; type t; @@ ( -t p = __skb_put(skb, len); +t p = __skb_put_zero(skb, len); ) ... when != p ( -memset(p, 0, len); ) @@ type t, t2; identifier p, p2; expression skb; @@ t *p; ... ( -p = __skb_put(skb, sizeof(t)); +p = __skb_put_zero(skb, sizeof(t)); | -p = (t *)__skb_put(skb, sizeof(t)); +p = __skb_put_zero(skb, sizeof(t)); ) ... when != p ( p2 = (t2)p; -memset(p2, 0, sizeof(*p)); | -memset(p, 0, sizeof(*p)); ) @@ expression skb, len; @@ -memset(__skb_put(skb, len), 0, len); +__skb_put_zero(skb, len); @@ expression skb, len, data; @@ -memcpy(__skb_put(skb, len), data, len); +__skb_put_data(skb, data, len); @@ expression SKB, C, S; typedef u8; identifier fn = {__skb_put}; fresh identifier fn2 = fn ## "_u8"; @@ - *(u8 *)fn(SKB, S) = C; + fn2(SKB, C); Signed-off-by: yuan linyu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 852feacf4bbf..a17e235639ae 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1904,6 +1904,28 @@ static inline void *__skb_put(struct sk_buff *skb, unsigned int len) return tmp; } +static inline void *__skb_put_zero(struct sk_buff *skb, unsigned int len) +{ + void *tmp = __skb_put(skb, len); + + memset(tmp, 0, len); + return tmp; +} + +static inline void *__skb_put_data(struct sk_buff *skb, const void *data, + unsigned int len) +{ + void *tmp = __skb_put(skb, len); + + memcpy(tmp, data, len); + return tmp; +} + +static inline void __skb_put_u8(struct sk_buff *skb, u8 val) +{ + *(u8 *)__skb_put(skb, 1) = val; +} + static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len) { void *tmp = skb_put(skb, len); -- cgit v1.2.3 From 3f1d472055bbe914c9e54715fdbf2272851e23ff Mon Sep 17 00:00:00 2001 From: Mariusz Skamra Date: Fri, 26 May 2017 15:00:47 +0200 Subject: ktime: Simplify ktime_compare implementation ktime_sub can be used here instread of two conditional checks. Signed-off-by: Mariusz Skamra Signed-off-by: Thomas Gleixner Acked-by: Kuppuswamy Sathyanarayanan Link: http://lkml.kernel.org/r/1495803647-9504-1-git-send-email-mariuszx.skamra@intel.com --- include/linux/ktime.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 0c8bd45c8206..04817b1ca019 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -108,11 +108,7 @@ static inline ktime_t timeval_to_ktime(struct timeval tv) */ static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2) { - if (cmp1 < cmp2) - return -1; - if (cmp1 > cmp2) - return 1; - return 0; + return ktime_sub(cmp1, cmp2); } /** -- cgit v1.2.3 From 104b4e5139fe384431ac11c3b8a6cf4a529edf4a Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 20 Jun 2017 21:01:20 +0300 Subject: percpu_counter: Rename __percpu_counter_add to percpu_counter_add_batch Currently, percpu_counter_add is a wrapper around __percpu_counter_add which is preempt safe due to explicit calls to preempt_disable. Given how __ prefix is used in percpu related interfaces, the naming unfortunately creates the false sense that __percpu_counter_add is less safe than percpu_counter_add. In terms of context-safety, they're equivalent. The only difference is that the __ version takes a batch parameter. Make this a bit more explicit by just renaming __percpu_counter_add to percpu_counter_add_batch. This patch doesn't cause any functional changes. tj: Minor updates to patch description for clarity. Cosmetic indentation updates. Signed-off-by: Nikolay Borisov Signed-off-by: Tejun Heo Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: Darrick J. Wong Cc: Jan Kara Cc: Jens Axboe Cc: linux-mm@kvack.org Cc: "David S. Miller" --- include/linux/backing-dev.h | 2 +- include/linux/blk-cgroup.h | 6 +++--- include/linux/mman.h | 2 +- include/linux/percpu_counter.h | 7 ++++--- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 557d84063934..ace73f96eb1e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -66,7 +66,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) static inline void __add_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item, s64 amount) { - __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH); + percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); } static inline void __inc_wb_stat(struct bdi_writeback *wb, diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 01b62e7bac74..7104bea8dab1 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -518,7 +518,7 @@ static inline void blkg_stat_exit(struct blkg_stat *stat) */ static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) { - __percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); + percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); } /** @@ -597,14 +597,14 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; - __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); + percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); if (op_is_sync(op)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; - __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); + percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); } /** diff --git a/include/linux/mman.h b/include/linux/mman.h index 634c4c51fe3a..c8367041fafd 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -22,7 +22,7 @@ unsigned long vm_memory_committed(void); static inline void vm_acct_memory(long pages) { - __percpu_counter_add(&vm_committed_as, pages, vm_committed_as_batch); + percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch); } static inline void vm_unacct_memory(long pages) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 84a109449610..ec065387f443 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -39,7 +39,8 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); -void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); +void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, + s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); @@ -50,7 +51,7 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { - __percpu_counter_add(fbc, amount, percpu_counter_batch); + percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) @@ -136,7 +137,7 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) } static inline void -__percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) +percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { percpu_counter_add(fbc, amount); } -- cgit v1.2.3 From 80ab6af432523b33352771b1eca1cee793cc7c81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jun 2017 09:24:40 +0200 Subject: block: remove the unused bio_to_phys macro Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 36aa641cde28..4907bea03908 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -118,7 +118,6 @@ static inline void *bio_data(struct bio *bio) /* * will die */ -#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio))) #define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) /* -- cgit v1.2.3 From efbeccdb59d666b9c77d505af01097cc0a9d102b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jun 2017 09:24:41 +0200 Subject: block: stop using bio_data() in blk_write_same_mergeable While the Write Same page currently always is in low-level it is just as easy and safer to just compare the page and offset directly. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22cfba64ce81..0deed7274a7f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -815,7 +815,8 @@ static inline bool rq_mergeable(struct request *rq) static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) { - if (bio_data(a) == bio_data(b)) + if (bio_page(a) == bio_page(b) && + bio_offset(a) == bio_offset(b)) return true; return false; -- cgit v1.2.3 From 073196787727e454e17a96d222ea55eba2000978 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 20 Jun 2017 11:15:38 -0700 Subject: blk-mq: Reduce blk_mq_hw_ctx size Since the srcu structure is rather large (184 bytes on an x86-64 system with kernel debugging disabled), only allocate it if needed. Reported-by: Ming Lei Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Cc: Hannes Reinecke Cc: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f1bd13ae8f57..3f2c22a42df6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -39,8 +39,6 @@ struct blk_mq_hw_ctx { struct blk_mq_tags *tags; struct blk_mq_tags *sched_tags; - struct srcu_struct queue_rq_srcu; - unsigned long queued; unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 7 @@ -62,6 +60,9 @@ struct blk_mq_hw_ctx { struct dentry *debugfs_dir; struct dentry *sched_debugfs_dir; #endif + + /* Must be the last member - see also blk_mq_hw_ctx_size(). */ + struct srcu_struct queue_rq_srcu[0]; }; struct blk_mq_tag_set { -- cgit v1.2.3 From cd6ce1482fd9e691bb68c660fa918c90f6b1bc25 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 20 Jun 2017 11:15:39 -0700 Subject: block: Make request operation type argument declarations consistent Instead of declaring the second argument of blk_*_get_request() as int and passing it to functions that expect an unsigned int, declare that second argument as unsigned int. Also because of consistency, rename that second argument from 'rw' into 'op'. This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Hannes Reinecke Cc: Omar Sandoval Cc: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 +++--- include/linux/blkdev.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 3f2c22a42df6..3077714250ce 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -202,10 +202,10 @@ enum { BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */ }; -struct request *blk_mq_alloc_request(struct request_queue *q, int rw, +struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, unsigned int flags); -struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op, - unsigned int flags, unsigned int hctx_idx); +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, + unsigned int op, unsigned int flags, unsigned int hctx_idx); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); enum { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0deed7274a7f..e21dd893ee86 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -935,7 +935,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); -extern struct request *blk_get_request(struct request_queue *, int, gfp_t); +extern struct request *blk_get_request(struct request_queue *, unsigned int op, + gfp_t gfp_mask); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, -- cgit v1.2.3 From d280bab305431c1836423f3cd6a5ff0e35a601ef Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 20 Jun 2017 11:15:40 -0700 Subject: block: Introduce request_queue.initialize_rq_fn() Several block drivers need to initialize the driver-private request data after having called blk_get_request() and before .prep_rq_fn() is called, e.g. when submitting a REQ_OP_SCSI_* request. Avoid that that initialization code has to be repeated after every blk_get_request() call by adding new callback functions to struct request_queue and to struct blk_mq_ops. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ include/linux/blkdev.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 3077714250ce..366b83cee955 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -144,6 +144,8 @@ struct blk_mq_ops { init_request_fn *init_request; exit_request_fn *exit_request; reinit_request_fn *reinit_request; + /* Called from inside blk_get_request() */ + void (*initialize_rq_fn)(struct request *rq); map_queues_fn *map_queues; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e21dd893ee86..9a36164487d0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -410,8 +410,12 @@ struct request_queue { rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; + /* Called just after a request is allocated */ init_rq_fn *init_rq_fn; + /* Called just before a request is freed */ exit_rq_fn *exit_rq_fn; + /* Called from inside blk_get_request() */ + void (*initialize_rq_fn)(struct request *rq); const struct blk_mq_ops *mq_ops; -- cgit v1.2.3 From 9e0c829906b9aa1e7ad84689f2bcd56457bdb417 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 20 Jun 2017 11:15:44 -0700 Subject: block: Add a comment above queue_lockdep_assert_held() Add a comment above the queue_lockdep_assert_held() macro that explains the purpose of the q->queue_lock test. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Hannes Reinecke Cc: Omar Sandoval Cc: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9a36164487d0..3e60e7a654bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -635,6 +635,13 @@ struct request_queue { (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_POLL)) +/* + * @q->queue_lock is set while a queue is being initialized. Since we know + * that no other threads access the queue object before @q->queue_lock has + * been set, it is safe to manipulate queue flags without holding the + * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and + * blk_init_allocated_queue(). + */ static inline void queue_lockdep_assert_held(struct request_queue *q) { if (q->queue_lock) -- cgit v1.2.3 From fc6eead7c1e2e5376c25d2795d4539fdacbc0648 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 22 May 2017 17:20:20 -0700 Subject: time: Clean up CLOCK_MONOTONIC_RAW time handling Now that we fixed the sub-ns handling for CLOCK_MONOTONIC_RAW, remove the duplicitive tk->raw_time.tv_nsec, which can be stored in tk->tkr_raw.xtime_nsec (similarly to how its handled for monotonic time). Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Cc: Kevin Brodsky Cc: Will Deacon Cc: Daniel Mentz Tested-by: Daniel Mentz Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index f7043ccca81c..0a0a53daf2a2 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -51,7 +51,7 @@ struct tk_read_base { * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second - * @raw_time: Monotonic raw base time in timespec64 format + * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP * interval. @@ -93,7 +93,7 @@ struct timekeeper { unsigned int clock_was_set_seq; u8 cs_was_changed_seq; ktime_t next_leap_ktime; - struct timespec64 raw_time; + u64 raw_sec; /* The following members are for timekeeping internal use */ u64 cycle_interval; -- cgit v1.2.3 From 707188f5f2421a304324e6ef3aaf4413cfab0f3d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 May 2017 18:06:56 +0200 Subject: irq/generic-chip: Provide irq_free_generic_chip() Currently there's no way for users of irq_alloc_generic_chip() to free the allocated memory other than calling kfree() manually on the returned pointer. This may lead to errors if the internals of irq_alloc_generic_chip() ever change. Provide a routine to free the generic chip. Signed-off-by: Bartosz Golaszewski Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: linux-doc@vger.kernel.org Cc: Jonathan Corbet Link: http://lkml.kernel.org/r/1496246820-13250-2-git-send-email-brgl@bgdev.pl --- include/linux/irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 94d1ad6ffdd4..2c957fe5d9d7 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -973,6 +974,11 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, handler, clr, set, flags); \ }) +static inline void irq_free_generic_chip(struct irq_chip_generic *gc) +{ + kfree(gc); +} + static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) { return container_of(d->chip, struct irq_chip_type, chip); -- cgit v1.2.3 From 32bb6cbb3b4ea5ca24e3fa13e11772c192616e04 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 May 2017 18:06:57 +0200 Subject: irq/generic-chip: Provide irq_destroy_generic_chip() Most users of irq_alloc_generic_chip() call irq_setup_generic_chip() too. To simplify the cleanup provide a function that both removes a generic chip and frees its memory. Signed-off-by: Bartosz Golaszewski Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: linux-doc@vger.kernel.org Cc: Jonathan Corbet Link: http://lkml.kernel.org/r/1496246820-13250-3-git-send-email-brgl@bgdev.pl --- include/linux/irq.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 2c957fe5d9d7..dc63aa10ce70 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -979,6 +979,14 @@ static inline void irq_free_generic_chip(struct irq_chip_generic *gc) kfree(gc); } +static inline void irq_destroy_generic_chip(struct irq_chip_generic *gc, + u32 msk, unsigned int clr, + unsigned int set) +{ + irq_remove_generic_chip(gc, msk, clr, set); + irq_free_generic_chip(gc); +} + static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) { return container_of(d->chip, struct irq_chip_type, chip); -- cgit v1.2.3 From 1c3e36309fe2e94b8a889fa32cb5c871434f8ed6 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 May 2017 18:06:59 +0200 Subject: irq/generic-chip: Provide devm_irq_alloc_generic_chip() Provide a resource managed variant of irq_alloc_generic_chip(). Signed-off-by: Bartosz Golaszewski Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: linux-doc@vger.kernel.org Cc: Jonathan Corbet Link: http://lkml.kernel.org/r/1496246820-13250-5-git-send-email-brgl@bgdev.pl --- include/linux/irq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index dc63aa10ce70..64ae54673e08 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -958,6 +958,11 @@ int irq_setup_alt_chip(struct irq_data *d, unsigned int type); void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, unsigned int clr, unsigned int set); +struct irq_chip_generic * +devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct, + unsigned int irq_base, void __iomem *reg_base, + irq_flow_handler_t handler); + struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq); int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, -- cgit v1.2.3 From 30fd8fc5c91973485705f83c7efe9588b8e6f371 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 31 May 2017 18:07:00 +0200 Subject: irq/generic-chip: Provide devm_irq_setup_generic_chip() Provide a resource managed variant of irq_setup_generic_chip(). Signed-off-by: Bartosz Golaszewski Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: linux-doc@vger.kernel.org Cc: Jonathan Corbet Link: http://lkml.kernel.org/r/1496246820-13250-6-git-send-email-brgl@bgdev.pl --- include/linux/irq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 64ae54673e08..d996314b6522 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -962,6 +962,9 @@ struct irq_chip_generic * devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct, unsigned int irq_base, void __iomem *reg_base, irq_flow_handler_t handler); +int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, + u32 msk, enum irq_gc_flags flags, + unsigned int clr, unsigned int set); struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq); -- cgit v1.2.3 From da2e9cf03b8fccbb69dd1e215bb1e554ce8e8cbe Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 21 Jun 2017 08:26:34 +0300 Subject: qede: Fix compilation without QED_RDMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_QED_RDMA isn't defined, we'd hit the following: /include/linux/qed/qede_rdma.h:84:19: warning: ‘qede_rdma_dev_add’ used but never defined [enabled by default] static inline int qede_rdma_dev_add(struct qede_dev *dev); Fixes: bbfcd1e8e167 ("qed*: Set rdma generic functions prefix") Signed-off-by: Chad Dupuis Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qede_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h index 1348a16e5e4b..9904617a9730 100644 --- a/include/linux/qed/qede_rdma.h +++ b/include/linux/qed/qede_rdma.h @@ -81,7 +81,7 @@ void qede_rdma_dev_remove(struct qede_dev *dev); void qede_rdma_event_changeaddr(struct qede_dev *edr); #else -static inline int qede_rdma_dev_add(struct qede_dev *dev); +static inline int qede_rdma_dev_add(struct qede_dev *dev) { return 0; } -- cgit v1.2.3 From d0ba52f1d7649a3f088e410e860559cf36d479d0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 21 Jun 2017 13:39:13 -0400 Subject: ftrace: Add missing comment for FTRACE_OPS_FL_RCU All the enum flags for FTRACE_OPS has a comment except for the RCU one. Add the comment for that. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 473f088aabea..1b6992e994e6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -119,6 +119,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * for any of the functions that this ops will be registered for, then * this ops will fail to register or set_filter_ip. * PID - Is affected by set_ftrace_pid (allows filtering on those pids) + * RCU - Set when the ops can only be called when RCU is watching. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, -- cgit v1.2.3 From 852ec80983d682dc08a0573d37eeaa9814c4f6b1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 21 Jun 2017 10:55:47 -0700 Subject: blk-mq: Make it safe to quiesce and unquiesce from an interrupt handler Since blk_mq_quiesce_queue_nowait() can be called from interrupt context, make this safe. Since this function is not in the hot path, uninline it. Fixes: commit f4560ffe8cec ("blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue") Signed-off-by: Bart Van Assche Cc: Ming Lei Cc: Hannes Reinecke Cc: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 366b83cee955..23d32ff0b462 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -266,15 +266,7 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set); int blk_mq_map_queues(struct blk_mq_tag_set *set); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); -/* - * FIXME: this helper is just for working around mpt3sas. - */ -static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q) -{ - spin_lock_irq(q->queue_lock); - queue_flag_set(QUEUE_FLAG_QUIESCED, q); - spin_unlock_irq(q->queue_lock); -} +void blk_mq_quiesce_queue_nowait(struct request_queue *q); /* * Driver command data is immediately after the request. So subtract request -- cgit v1.2.3 From 6f6723e21589f4594bb72b27ddbb2f75defb33bb Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 24 Apr 2017 22:43:52 -0400 Subject: ima: define is_ima_appraise_enabled() Only return enabled if in enforcing mode, not fix or log modes. Signed-off-by: Mimi Zohar Changes: - Define is_ima_appraise_enabled() as a bool (Thiago Bauermann) --- include/linux/ima.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 7f6952f8d6aa..0e4647e0eb60 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -75,11 +75,17 @@ static inline void ima_add_kexec_buffer(struct kimage *image) #endif #ifdef CONFIG_IMA_APPRAISE +extern bool is_ima_appraise_enabled(void); extern void ima_inode_post_setattr(struct dentry *dentry); extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len); extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name); #else +static inline bool is_ima_appraise_enabled(void) +{ + return 0; +} + static inline void ima_inode_post_setattr(struct dentry *dentry) { return; -- cgit v1.2.3 From 7003cdd6a121e7bdb8a05eb1931f9549a36ea723 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Wed, 21 Jun 2017 16:22:46 +0300 Subject: qed*: Rename qed_roce_if.h to qed_rdma_if.h Rename the qed_roce_if file to qed_rdma_if as it represents a common interface for RoCE and iWARP. this commit affects RDMA/qedr as well. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_rdma_if.h | 582 ++++++++++++++++++++++++++++++++++++++++ include/linux/qed/qed_roce_if.h | 582 ---------------------------------------- 2 files changed, 582 insertions(+), 582 deletions(-) create mode 100644 include/linux/qed/qed_rdma_if.h delete mode 100644 include/linux/qed/qed_roce_if.h (limited to 'include/linux') diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h new file mode 100644 index 000000000000..ff9be01b5f53 --- /dev/null +++ b/include/linux/qed/qed_rdma_if.h @@ -0,0 +1,582 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_RDMA_IF_H +#define _QED_RDMA_IF_H +#include +#include +#include +#include +#include +#include +#include + +enum qed_roce_ll2_tx_dest { + /* Light L2 TX Destination to the Network */ + QED_ROCE_LL2_TX_DEST_NW, + + /* Light L2 TX Destination to the Loopback */ + QED_ROCE_LL2_TX_DEST_LB, + QED_ROCE_LL2_TX_DEST_MAX +}; + +#define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) + +/* rdma interface */ + +enum qed_roce_qp_state { + QED_ROCE_QP_STATE_RESET, + QED_ROCE_QP_STATE_INIT, + QED_ROCE_QP_STATE_RTR, + QED_ROCE_QP_STATE_RTS, + QED_ROCE_QP_STATE_SQD, + QED_ROCE_QP_STATE_ERR, + QED_ROCE_QP_STATE_SQE +}; + +enum qed_rdma_tid_type { + QED_RDMA_TID_REGISTERED_MR, + QED_RDMA_TID_FMR, + QED_RDMA_TID_MW_TYPE1, + QED_RDMA_TID_MW_TYPE2A +}; + +struct qed_rdma_events { + void *context; + void (*affiliated_event)(void *context, u8 fw_event_code, + void *fw_handle); + void (*unaffiliated_event)(void *context, u8 event_code); +}; + +struct qed_rdma_device { + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 fw_ver; + + u64 node_guid; + u64 sys_image_guid; + + u8 max_cnq; + u8 max_sge; + u8 max_srq_sge; + u16 max_inline; + u32 max_wqe; + u32 max_srq_wqe; + u8 max_qp_resp_rd_atomic_resc; + u8 max_qp_req_rd_atomic_resc; + u64 max_dev_resp_rd_atomic_resc; + u32 max_cq; + u32 max_qp; + u32 max_srq; + u32 max_mr; + u64 max_mr_size; + u32 max_cqe; + u32 max_mw; + u32 max_fmr; + u32 max_mr_mw_fmr_pbl; + u64 max_mr_mw_fmr_size; + u32 max_pd; + u32 max_ah; + u8 max_pkey; + u16 max_srq_wr; + u8 max_stats_queues; + u32 dev_caps; + + /* Abilty to support RNR-NAK generation */ + +#define QED_RDMA_DEV_CAP_RNR_NAK_MASK 0x1 +#define QED_RDMA_DEV_CAP_RNR_NAK_SHIFT 0 + /* Abilty to support shutdown port */ +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_MASK 0x1 +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_SHIFT 1 + /* Abilty to support port active event */ +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT 2 + /* Abilty to support port change event */ +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_SHIFT 3 + /* Abilty to support system image GUID */ +#define QED_RDMA_DEV_CAP_SYS_IMAGE_MASK 0x1 +#define QED_RDMA_DEV_CAP_SYS_IMAGE_SHIFT 4 + /* Abilty to support bad P_Key counter support */ +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_SHIFT 5 + /* Abilty to support atomic operations */ +#define QED_RDMA_DEV_CAP_ATOMIC_OP_MASK 0x1 +#define QED_RDMA_DEV_CAP_ATOMIC_OP_SHIFT 6 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_SHIFT 7 + /* Abilty to support modifying the maximum number of + * outstanding work requests per QP + */ +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_SHIFT 8 + /* Abilty to support automatic path migration */ +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_MASK 0x1 +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_SHIFT 9 + /* Abilty to support the base memory management extensions */ +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_SHIFT 10 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_SHIFT 11 + /* Abilty to support multipile page sizes per memory region */ +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT 12 + /* Abilty to support block list physical buffer list */ +#define QED_RDMA_DEV_CAP_BLOCK_MODE_MASK 0x1 +#define QED_RDMA_DEV_CAP_BLOCK_MODE_SHIFT 13 + /* Abilty to support zero based virtual addresses */ +#define QED_RDMA_DEV_CAP_ZBVA_MASK 0x1 +#define QED_RDMA_DEV_CAP_ZBVA_SHIFT 14 + /* Abilty to support local invalidate fencing */ +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_MASK 0x1 +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_SHIFT 15 + /* Abilty to support Loopback on QP */ +#define QED_RDMA_DEV_CAP_LB_INDICATOR_MASK 0x1 +#define QED_RDMA_DEV_CAP_LB_INDICATOR_SHIFT 16 + u64 page_size_caps; + u8 dev_ack_delay; + u32 reserved_lkey; + u32 bad_pkey_counter; + struct qed_rdma_events events; +}; + +enum qed_port_state { + QED_RDMA_PORT_UP, + QED_RDMA_PORT_DOWN, +}; + +enum qed_roce_capability { + QED_ROCE_V1 = 1 << 0, + QED_ROCE_V2 = 1 << 1, +}; + +struct qed_rdma_port { + enum qed_port_state port_state; + int link_speed; + u64 max_msg_size; + u8 source_gid_table_len; + void *source_gid_table_ptr; + u8 pkey_table_len; + void *pkey_table_ptr; + u32 pkey_bad_counter; + enum qed_roce_capability capability; +}; + +struct qed_rdma_cnq_params { + u8 num_pbl_pages; + u64 pbl_ptr; +}; + +/* The CQ Mode affects the CQ doorbell transaction size. + * 64/32 bit machines should configure to 32/16 bits respectively. + */ +enum qed_rdma_cq_mode { + QED_RDMA_CQ_MODE_16_BITS, + QED_RDMA_CQ_MODE_32_BITS, +}; + +struct qed_roce_dcqcn_params { + u8 notification_point; + u8 reaction_point; + + /* fields for notification point */ + u32 cnp_send_timeout; + + /* fields for reaction point */ + u32 rl_bc_rate; + u16 rl_max_rate; + u16 rl_r_ai; + u16 rl_r_hai; + u16 dcqcn_g; + u32 dcqcn_k_us; + u32 dcqcn_timeout_us; +}; + +struct qed_rdma_start_in_params { + struct qed_rdma_events *events; + struct qed_rdma_cnq_params cnq_pbl_list[128]; + u8 desired_cnq; + enum qed_rdma_cq_mode cq_mode; + struct qed_roce_dcqcn_params dcqcn_params; + u16 max_mtu; + u8 mac_addr[ETH_ALEN]; + u8 iwarp_flags; +}; + +struct qed_rdma_add_user_out_params { + u16 dpi; + u64 dpi_addr; + u64 dpi_phys_addr; + u32 dpi_size; + u16 wid_count; +}; + +enum roce_mode { + ROCE_V1, + ROCE_V2_IPV4, + ROCE_V2_IPV6, + MAX_ROCE_MODE +}; + +union qed_gid { + u8 bytes[16]; + u16 words[8]; + u32 dwords[4]; + u64 qwords[2]; + u32 ipv4_addr; +}; + +struct qed_rdma_register_tid_in_params { + u32 itid; + enum qed_rdma_tid_type tid_type; + u8 key; + u16 pd; + bool local_read; + bool local_write; + bool remote_read; + bool remote_write; + bool remote_atomic; + bool mw_bind; + u64 pbl_ptr; + bool pbl_two_level; + u8 pbl_page_size_log; + u8 page_size_log; + u32 fbo; + u64 length; + u64 vaddr; + bool zbva; + bool phy_mr; + bool dma_mr; + + bool dif_enabled; + u64 dif_error_addr; + u64 dif_runt_addr; +}; + +struct qed_rdma_create_cq_in_params { + u32 cq_handle_lo; + u32 cq_handle_hi; + u32 cq_size; + u16 dpi; + bool pbl_two_level; + u64 pbl_ptr; + u16 pbl_num_pages; + u8 pbl_page_size_log; + u8 cnq_id; + u16 int_timeout; +}; + +struct qed_rdma_create_srq_in_params { + u64 pbl_base_addr; + u64 prod_pair_addr; + u16 num_pages; + u16 pd_id; + u16 page_size; +}; + +struct qed_rdma_destroy_cq_in_params { + u16 icid; +}; + +struct qed_rdma_destroy_cq_out_params { + u16 num_cq_notif; +}; + +struct qed_rdma_create_qp_in_params { + u32 qp_handle_lo; + u32 qp_handle_hi; + u32 qp_handle_async_lo; + u32 qp_handle_async_hi; + bool use_srq; + bool signal_all; + bool fmr_and_reserved_lkey; + u16 pd; + u16 dpi; + u16 sq_cq_id; + u16 sq_num_pages; + u64 sq_pbl_ptr; + u8 max_sq_sges; + u16 rq_cq_id; + u16 rq_num_pages; + u64 rq_pbl_ptr; + u16 srq_id; + u8 stats_queue; +}; + +struct qed_rdma_create_qp_out_params { + u32 qp_id; + u16 icid; + void *rq_pbl_virt; + dma_addr_t rq_pbl_phys; + void *sq_pbl_virt; + dma_addr_t sq_pbl_phys; +}; + +struct qed_rdma_modify_qp_in_params { + u32 modify_flags; +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_SHIFT 0 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_SHIFT 1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_SHIFT 2 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_SHIFT 3 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_SHIFT 4 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_SHIFT 5 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_SHIFT 6 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_SHIFT 7 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_SHIFT 8 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_SHIFT 9 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_SHIFT 10 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_SHIFT 11 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_SHIFT 12 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_SHIFT 13 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_SHIFT 14 + + enum qed_roce_qp_state new_state; + u16 pkey; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + u32 dest_qp; + bool lb_indication; + u16 mtu; + u8 traffic_class_tos; + u8 hop_limit_ttl; + u32 flow_label; + union qed_gid sgid; + union qed_gid dgid; + u16 udp_src_port; + + u16 vlan_id; + + u32 rq_psn; + u32 sq_psn; + u8 max_rd_atomic_resp; + u8 max_rd_atomic_req; + u32 ack_timeout; + u8 retry_cnt; + u8 rnr_retry_cnt; + u8 min_rnr_nak_timer; + bool sqd_async; + u8 remote_mac_addr[6]; + u8 local_mac_addr[6]; + bool use_local_mac; + enum roce_mode roce_mode; +}; + +struct qed_rdma_query_qp_out_params { + enum qed_roce_qp_state state; + u32 rq_psn; + u32 sq_psn; + bool draining; + u16 mtu; + u32 dest_qp; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + union qed_gid sgid; + union qed_gid dgid; + u32 flow_label; + u8 hop_limit_ttl; + u8 traffic_class_tos; + u32 timeout; + u8 rnr_retry; + u8 retry_cnt; + u8 min_rnr_nak_timer; + u16 pkey_index; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + bool sqd_async; +}; + +struct qed_rdma_create_srq_out_params { + u16 srq_id; +}; + +struct qed_rdma_destroy_srq_in_params { + u16 srq_id; +}; + +struct qed_rdma_modify_srq_in_params { + u32 wqe_limit; + u16 srq_id; +}; + +struct qed_rdma_stats_out_params { + u64 sent_bytes; + u64 sent_pkts; + u64 rcv_bytes; + u64 rcv_pkts; +}; + +struct qed_rdma_counters_out_params { + u64 pd_count; + u64 max_pd; + u64 dpi_count; + u64 max_dpi; + u64 cq_count; + u64 max_cq; + u64 qp_count; + u64 max_qp; + u64 tid_count; + u64 max_tid; +}; + +#define QED_ROCE_TX_HEAD_FAILURE (1) +#define QED_ROCE_TX_FRAG_FAILURE (2) + +struct qed_roce_ll2_header { + void *vaddr; + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_buffer { + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_packet { + struct qed_roce_ll2_header header; + int n_seg; + struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; + int roce_mode; + enum qed_roce_ll2_tx_dest tx_dest; +}; + +enum qed_rdma_type { + QED_RDMA_TYPE_ROCE, +}; + +struct qed_dev_rdma_info { + struct qed_dev_info common; + enum qed_rdma_type rdma_type; + u8 user_dpm_enabled; +}; + +struct qed_rdma_ops { + const struct qed_common_ops *common; + + int (*fill_dev_info)(struct qed_dev *cdev, + struct qed_dev_rdma_info *info); + void *(*rdma_get_rdma_ctx)(struct qed_dev *cdev); + + int (*rdma_init)(struct qed_dev *dev, + struct qed_rdma_start_in_params *iparams); + + int (*rdma_add_user)(void *rdma_cxt, + struct qed_rdma_add_user_out_params *oparams); + + void (*rdma_remove_user)(void *rdma_cxt, u16 dpi); + int (*rdma_stop)(void *rdma_cxt); + struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt); + struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt); + int (*rdma_get_start_sb)(struct qed_dev *cdev); + int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev); + void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod); + int (*rdma_get_rdma_int)(struct qed_dev *cdev, + struct qed_int_info *info); + int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); + int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd); + void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd); + int (*rdma_create_cq)(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, + u16 *icid); + int (*rdma_destroy_cq)(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *iparams, + struct qed_rdma_destroy_cq_out_params *oparams); + struct qed_rdma_qp * + (*rdma_create_qp)(void *rdma_cxt, + struct qed_rdma_create_qp_in_params *iparams, + struct qed_rdma_create_qp_out_params *oparams); + + int (*rdma_modify_qp)(void *roce_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *iparams); + + int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *oparams); + int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); + + int + (*rdma_register_tid)(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *iparams); + + int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); + int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); + void (*rdma_free_tid)(void *rdma_cxt, u32 itid); + + int (*ll2_acquire_connection)(void *rdma_cxt, + struct qed_ll2_acquire_data *data); + + int (*ll2_establish_connection)(void *rdma_cxt, u8 connection_handle); + int (*ll2_terminate_connection)(void *rdma_cxt, u8 connection_handle); + void (*ll2_release_connection)(void *rdma_cxt, u8 connection_handle); + + int (*ll2_prepare_tx_packet)(void *rdma_cxt, + u8 connection_handle, + struct qed_ll2_tx_pkt_info *pkt, + bool notify_fw); + + int (*ll2_set_fragment_of_tx_packet)(void *rdma_cxt, + u8 connection_handle, + dma_addr_t addr, + u16 nbytes); + int (*ll2_post_rx_buffer)(void *rdma_cxt, u8 connection_handle, + dma_addr_t addr, u16 buf_len, void *cookie, + u8 notify_fw); + int (*ll2_get_stats)(void *rdma_cxt, + u8 connection_handle, + struct qed_ll2_stats *p_stats); + int (*ll2_set_mac_filter)(struct qed_dev *cdev, + u8 *old_mac_address, u8 *new_mac_address); + +}; + +const struct qed_rdma_ops *qed_get_rdma_ops(void); + +#endif diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h deleted file mode 100644 index 8e70f5ee05af..000000000000 --- a/include/linux/qed/qed_roce_if.h +++ /dev/null @@ -1,582 +0,0 @@ -/* QLogic qed NIC Driver - * Copyright (c) 2015-2017 QLogic Corporation - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and /or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _QED_ROCE_IF_H -#define _QED_ROCE_IF_H -#include -#include -#include -#include -#include -#include -#include - -enum qed_roce_ll2_tx_dest { - /* Light L2 TX Destination to the Network */ - QED_ROCE_LL2_TX_DEST_NW, - - /* Light L2 TX Destination to the Loopback */ - QED_ROCE_LL2_TX_DEST_LB, - QED_ROCE_LL2_TX_DEST_MAX -}; - -#define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) - -/* rdma interface */ - -enum qed_roce_qp_state { - QED_ROCE_QP_STATE_RESET, - QED_ROCE_QP_STATE_INIT, - QED_ROCE_QP_STATE_RTR, - QED_ROCE_QP_STATE_RTS, - QED_ROCE_QP_STATE_SQD, - QED_ROCE_QP_STATE_ERR, - QED_ROCE_QP_STATE_SQE -}; - -enum qed_rdma_tid_type { - QED_RDMA_TID_REGISTERED_MR, - QED_RDMA_TID_FMR, - QED_RDMA_TID_MW_TYPE1, - QED_RDMA_TID_MW_TYPE2A -}; - -struct qed_rdma_events { - void *context; - void (*affiliated_event)(void *context, u8 fw_event_code, - void *fw_handle); - void (*unaffiliated_event)(void *context, u8 event_code); -}; - -struct qed_rdma_device { - u32 vendor_id; - u32 vendor_part_id; - u32 hw_ver; - u64 fw_ver; - - u64 node_guid; - u64 sys_image_guid; - - u8 max_cnq; - u8 max_sge; - u8 max_srq_sge; - u16 max_inline; - u32 max_wqe; - u32 max_srq_wqe; - u8 max_qp_resp_rd_atomic_resc; - u8 max_qp_req_rd_atomic_resc; - u64 max_dev_resp_rd_atomic_resc; - u32 max_cq; - u32 max_qp; - u32 max_srq; - u32 max_mr; - u64 max_mr_size; - u32 max_cqe; - u32 max_mw; - u32 max_fmr; - u32 max_mr_mw_fmr_pbl; - u64 max_mr_mw_fmr_size; - u32 max_pd; - u32 max_ah; - u8 max_pkey; - u16 max_srq_wr; - u8 max_stats_queues; - u32 dev_caps; - - /* Abilty to support RNR-NAK generation */ - -#define QED_RDMA_DEV_CAP_RNR_NAK_MASK 0x1 -#define QED_RDMA_DEV_CAP_RNR_NAK_SHIFT 0 - /* Abilty to support shutdown port */ -#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_MASK 0x1 -#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_SHIFT 1 - /* Abilty to support port active event */ -#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_MASK 0x1 -#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT 2 - /* Abilty to support port change event */ -#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_MASK 0x1 -#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_SHIFT 3 - /* Abilty to support system image GUID */ -#define QED_RDMA_DEV_CAP_SYS_IMAGE_MASK 0x1 -#define QED_RDMA_DEV_CAP_SYS_IMAGE_SHIFT 4 - /* Abilty to support bad P_Key counter support */ -#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_MASK 0x1 -#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_SHIFT 5 - /* Abilty to support atomic operations */ -#define QED_RDMA_DEV_CAP_ATOMIC_OP_MASK 0x1 -#define QED_RDMA_DEV_CAP_ATOMIC_OP_SHIFT 6 -#define QED_RDMA_DEV_CAP_RESIZE_CQ_MASK 0x1 -#define QED_RDMA_DEV_CAP_RESIZE_CQ_SHIFT 7 - /* Abilty to support modifying the maximum number of - * outstanding work requests per QP - */ -#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_MASK 0x1 -#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_SHIFT 8 - /* Abilty to support automatic path migration */ -#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_MASK 0x1 -#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_SHIFT 9 - /* Abilty to support the base memory management extensions */ -#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_MASK 0x1 -#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_SHIFT 10 -#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_MASK 0x1 -#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_SHIFT 11 - /* Abilty to support multipile page sizes per memory region */ -#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK 0x1 -#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT 12 - /* Abilty to support block list physical buffer list */ -#define QED_RDMA_DEV_CAP_BLOCK_MODE_MASK 0x1 -#define QED_RDMA_DEV_CAP_BLOCK_MODE_SHIFT 13 - /* Abilty to support zero based virtual addresses */ -#define QED_RDMA_DEV_CAP_ZBVA_MASK 0x1 -#define QED_RDMA_DEV_CAP_ZBVA_SHIFT 14 - /* Abilty to support local invalidate fencing */ -#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_MASK 0x1 -#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_SHIFT 15 - /* Abilty to support Loopback on QP */ -#define QED_RDMA_DEV_CAP_LB_INDICATOR_MASK 0x1 -#define QED_RDMA_DEV_CAP_LB_INDICATOR_SHIFT 16 - u64 page_size_caps; - u8 dev_ack_delay; - u32 reserved_lkey; - u32 bad_pkey_counter; - struct qed_rdma_events events; -}; - -enum qed_port_state { - QED_RDMA_PORT_UP, - QED_RDMA_PORT_DOWN, -}; - -enum qed_roce_capability { - QED_ROCE_V1 = 1 << 0, - QED_ROCE_V2 = 1 << 1, -}; - -struct qed_rdma_port { - enum qed_port_state port_state; - int link_speed; - u64 max_msg_size; - u8 source_gid_table_len; - void *source_gid_table_ptr; - u8 pkey_table_len; - void *pkey_table_ptr; - u32 pkey_bad_counter; - enum qed_roce_capability capability; -}; - -struct qed_rdma_cnq_params { - u8 num_pbl_pages; - u64 pbl_ptr; -}; - -/* The CQ Mode affects the CQ doorbell transaction size. - * 64/32 bit machines should configure to 32/16 bits respectively. - */ -enum qed_rdma_cq_mode { - QED_RDMA_CQ_MODE_16_BITS, - QED_RDMA_CQ_MODE_32_BITS, -}; - -struct qed_roce_dcqcn_params { - u8 notification_point; - u8 reaction_point; - - /* fields for notification point */ - u32 cnp_send_timeout; - - /* fields for reaction point */ - u32 rl_bc_rate; - u16 rl_max_rate; - u16 rl_r_ai; - u16 rl_r_hai; - u16 dcqcn_g; - u32 dcqcn_k_us; - u32 dcqcn_timeout_us; -}; - -struct qed_rdma_start_in_params { - struct qed_rdma_events *events; - struct qed_rdma_cnq_params cnq_pbl_list[128]; - u8 desired_cnq; - enum qed_rdma_cq_mode cq_mode; - struct qed_roce_dcqcn_params dcqcn_params; - u16 max_mtu; - u8 mac_addr[ETH_ALEN]; - u8 iwarp_flags; -}; - -struct qed_rdma_add_user_out_params { - u16 dpi; - u64 dpi_addr; - u64 dpi_phys_addr; - u32 dpi_size; - u16 wid_count; -}; - -enum roce_mode { - ROCE_V1, - ROCE_V2_IPV4, - ROCE_V2_IPV6, - MAX_ROCE_MODE -}; - -union qed_gid { - u8 bytes[16]; - u16 words[8]; - u32 dwords[4]; - u64 qwords[2]; - u32 ipv4_addr; -}; - -struct qed_rdma_register_tid_in_params { - u32 itid; - enum qed_rdma_tid_type tid_type; - u8 key; - u16 pd; - bool local_read; - bool local_write; - bool remote_read; - bool remote_write; - bool remote_atomic; - bool mw_bind; - u64 pbl_ptr; - bool pbl_two_level; - u8 pbl_page_size_log; - u8 page_size_log; - u32 fbo; - u64 length; - u64 vaddr; - bool zbva; - bool phy_mr; - bool dma_mr; - - bool dif_enabled; - u64 dif_error_addr; - u64 dif_runt_addr; -}; - -struct qed_rdma_create_cq_in_params { - u32 cq_handle_lo; - u32 cq_handle_hi; - u32 cq_size; - u16 dpi; - bool pbl_two_level; - u64 pbl_ptr; - u16 pbl_num_pages; - u8 pbl_page_size_log; - u8 cnq_id; - u16 int_timeout; -}; - -struct qed_rdma_create_srq_in_params { - u64 pbl_base_addr; - u64 prod_pair_addr; - u16 num_pages; - u16 pd_id; - u16 page_size; -}; - -struct qed_rdma_destroy_cq_in_params { - u16 icid; -}; - -struct qed_rdma_destroy_cq_out_params { - u16 num_cq_notif; -}; - -struct qed_rdma_create_qp_in_params { - u32 qp_handle_lo; - u32 qp_handle_hi; - u32 qp_handle_async_lo; - u32 qp_handle_async_hi; - bool use_srq; - bool signal_all; - bool fmr_and_reserved_lkey; - u16 pd; - u16 dpi; - u16 sq_cq_id; - u16 sq_num_pages; - u64 sq_pbl_ptr; - u8 max_sq_sges; - u16 rq_cq_id; - u16 rq_num_pages; - u64 rq_pbl_ptr; - u16 srq_id; - u8 stats_queue; -}; - -struct qed_rdma_create_qp_out_params { - u32 qp_id; - u16 icid; - void *rq_pbl_virt; - dma_addr_t rq_pbl_phys; - void *sq_pbl_virt; - dma_addr_t sq_pbl_phys; -}; - -struct qed_rdma_modify_qp_in_params { - u32 modify_flags; -#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_MASK 0x1 -#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_SHIFT 0 -#define QED_ROCE_MODIFY_QP_VALID_PKEY_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_PKEY_SHIFT 1 -#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_MASK 0x1 -#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_SHIFT 2 -#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_SHIFT 3 -#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_SHIFT 4 -#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_SHIFT 5 -#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_SHIFT 6 -#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_MASK 0x1 -#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_SHIFT 7 -#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_MASK 0x1 -#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_SHIFT 8 -#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_SHIFT 9 -#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_SHIFT 10 -#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_SHIFT 11 -#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_SHIFT 12 -#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_SHIFT 13 -#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_MASK 0x1 -#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_SHIFT 14 - - enum qed_roce_qp_state new_state; - u16 pkey; - bool incoming_rdma_read_en; - bool incoming_rdma_write_en; - bool incoming_atomic_en; - bool e2e_flow_control_en; - u32 dest_qp; - bool lb_indication; - u16 mtu; - u8 traffic_class_tos; - u8 hop_limit_ttl; - u32 flow_label; - union qed_gid sgid; - union qed_gid dgid; - u16 udp_src_port; - - u16 vlan_id; - - u32 rq_psn; - u32 sq_psn; - u8 max_rd_atomic_resp; - u8 max_rd_atomic_req; - u32 ack_timeout; - u8 retry_cnt; - u8 rnr_retry_cnt; - u8 min_rnr_nak_timer; - bool sqd_async; - u8 remote_mac_addr[6]; - u8 local_mac_addr[6]; - bool use_local_mac; - enum roce_mode roce_mode; -}; - -struct qed_rdma_query_qp_out_params { - enum qed_roce_qp_state state; - u32 rq_psn; - u32 sq_psn; - bool draining; - u16 mtu; - u32 dest_qp; - bool incoming_rdma_read_en; - bool incoming_rdma_write_en; - bool incoming_atomic_en; - bool e2e_flow_control_en; - union qed_gid sgid; - union qed_gid dgid; - u32 flow_label; - u8 hop_limit_ttl; - u8 traffic_class_tos; - u32 timeout; - u8 rnr_retry; - u8 retry_cnt; - u8 min_rnr_nak_timer; - u16 pkey_index; - u8 max_rd_atomic; - u8 max_dest_rd_atomic; - bool sqd_async; -}; - -struct qed_rdma_create_srq_out_params { - u16 srq_id; -}; - -struct qed_rdma_destroy_srq_in_params { - u16 srq_id; -}; - -struct qed_rdma_modify_srq_in_params { - u32 wqe_limit; - u16 srq_id; -}; - -struct qed_rdma_stats_out_params { - u64 sent_bytes; - u64 sent_pkts; - u64 rcv_bytes; - u64 rcv_pkts; -}; - -struct qed_rdma_counters_out_params { - u64 pd_count; - u64 max_pd; - u64 dpi_count; - u64 max_dpi; - u64 cq_count; - u64 max_cq; - u64 qp_count; - u64 max_qp; - u64 tid_count; - u64 max_tid; -}; - -#define QED_ROCE_TX_HEAD_FAILURE (1) -#define QED_ROCE_TX_FRAG_FAILURE (2) - -struct qed_roce_ll2_header { - void *vaddr; - dma_addr_t baddr; - size_t len; -}; - -struct qed_roce_ll2_buffer { - dma_addr_t baddr; - size_t len; -}; - -struct qed_roce_ll2_packet { - struct qed_roce_ll2_header header; - int n_seg; - struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; - int roce_mode; - enum qed_roce_ll2_tx_dest tx_dest; -}; - -enum qed_rdma_type { - QED_RDMA_TYPE_ROCE, -}; - -struct qed_dev_rdma_info { - struct qed_dev_info common; - enum qed_rdma_type rdma_type; - u8 user_dpm_enabled; -}; - -struct qed_rdma_ops { - const struct qed_common_ops *common; - - int (*fill_dev_info)(struct qed_dev *cdev, - struct qed_dev_rdma_info *info); - void *(*rdma_get_rdma_ctx)(struct qed_dev *cdev); - - int (*rdma_init)(struct qed_dev *dev, - struct qed_rdma_start_in_params *iparams); - - int (*rdma_add_user)(void *rdma_cxt, - struct qed_rdma_add_user_out_params *oparams); - - void (*rdma_remove_user)(void *rdma_cxt, u16 dpi); - int (*rdma_stop)(void *rdma_cxt); - struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt); - struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt); - int (*rdma_get_start_sb)(struct qed_dev *cdev); - int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev); - void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod); - int (*rdma_get_rdma_int)(struct qed_dev *cdev, - struct qed_int_info *info); - int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); - int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd); - void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd); - int (*rdma_create_cq)(void *rdma_cxt, - struct qed_rdma_create_cq_in_params *params, - u16 *icid); - int (*rdma_destroy_cq)(void *rdma_cxt, - struct qed_rdma_destroy_cq_in_params *iparams, - struct qed_rdma_destroy_cq_out_params *oparams); - struct qed_rdma_qp * - (*rdma_create_qp)(void *rdma_cxt, - struct qed_rdma_create_qp_in_params *iparams, - struct qed_rdma_create_qp_out_params *oparams); - - int (*rdma_modify_qp)(void *roce_cxt, struct qed_rdma_qp *qp, - struct qed_rdma_modify_qp_in_params *iparams); - - int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, - struct qed_rdma_query_qp_out_params *oparams); - int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); - - int - (*rdma_register_tid)(void *rdma_cxt, - struct qed_rdma_register_tid_in_params *iparams); - - int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); - int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); - void (*rdma_free_tid)(void *rdma_cxt, u32 itid); - - int (*ll2_acquire_connection)(void *rdma_cxt, - struct qed_ll2_acquire_data *data); - - int (*ll2_establish_connection)(void *rdma_cxt, u8 connection_handle); - int (*ll2_terminate_connection)(void *rdma_cxt, u8 connection_handle); - void (*ll2_release_connection)(void *rdma_cxt, u8 connection_handle); - - int (*ll2_prepare_tx_packet)(void *rdma_cxt, - u8 connection_handle, - struct qed_ll2_tx_pkt_info *pkt, - bool notify_fw); - - int (*ll2_set_fragment_of_tx_packet)(void *rdma_cxt, - u8 connection_handle, - dma_addr_t addr, - u16 nbytes); - int (*ll2_post_rx_buffer)(void *rdma_cxt, u8 connection_handle, - dma_addr_t addr, u16 buf_len, void *cookie, - u8 notify_fw); - int (*ll2_get_stats)(void *rdma_cxt, - u8 connection_handle, - struct qed_ll2_stats *p_stats); - int (*ll2_set_mac_filter)(struct qed_dev *cdev, - u8 *old_mac_address, u8 *new_mac_address); - -}; - -const struct qed_rdma_ops *qed_get_rdma_ops(void); - -#endif -- cgit v1.2.3 From 3708184afc77bb67709a67a35d9f367ebd32cbc4 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 6 Jun 2017 12:37:37 +0300 Subject: device property: Move FW type specific functionality to FW specific files The device and fwnode property API supports Devicetree, ACPI and pset properties. The implementation of this functionality for each firmware type was embedded in the fwnode property core. Move it out to firmware type specific locations, making it easier to maintain. Depends-on: ("of: Move OF property and graph API from base.c to property.c") Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Acked-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++++ include/linux/fwnode.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/of.h | 2 ++ 3 files changed, 60 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 137e4a3d89c5..b8f23c521b67 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -56,6 +56,9 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) + +extern const struct fwnode_operations acpi_fwnode_ops; + static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) { struct fwnode_handle *fwnode; @@ -65,6 +68,7 @@ static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) return NULL; fwnode->type = FWNODE_ACPI_STATIC; + fwnode->ops = &acpi_fwnode_ops; return fwnode; } diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 3dff2398a5f0..8f64b3ae9c57 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -12,6 +12,8 @@ #ifndef _LINUX_FWNODE_H_ #define _LINUX_FWNODE_H_ +#include + enum fwnode_type { FWNODE_INVALID = 0, FWNODE_OF, @@ -22,9 +24,12 @@ enum fwnode_type { FWNODE_IRQCHIP }; +struct fwnode_operations; + struct fwnode_handle { enum fwnode_type type; struct fwnode_handle *secondary; + const struct fwnode_operations *ops; }; /** @@ -39,4 +44,53 @@ struct fwnode_endpoint { const struct fwnode_handle *local_fwnode; }; +/** + * struct fwnode_operations - Operations for fwnode interface + * @get: Get a reference to an fwnode. + * @put: Put a reference to an fwnode. + * @property_present: Return true if a property is present. + * @property_read_integer_array: Read an array of integer properties. Return + * zero on success, a negative error code + * otherwise. + * @property_read_string_array: Read an array of string properties. Return zero + * on success, a negative error code otherwise. + * @get_parent: Return the parent of an fwnode. + * @get_next_child_node: Return the next child node in an iteration. + * @get_named_child_node: Return a child node with a given name. + */ +struct fwnode_operations { + void (*get)(struct fwnode_handle *fwnode); + void (*put)(struct fwnode_handle *fwnode); + bool (*property_present)(struct fwnode_handle *fwnode, + const char *propname); + int (*property_read_int_array)(struct fwnode_handle *fwnode, + const char *propname, + unsigned int elem_size, void *val, + size_t nval); + int (*property_read_string_array)(struct fwnode_handle *fwnode_handle, + const char *propname, + const char **val, size_t nval); + struct fwnode_handle *(*get_parent)(struct fwnode_handle *fwnode); + struct fwnode_handle * + (*get_next_child_node)(struct fwnode_handle *fwnode, + struct fwnode_handle *child); + struct fwnode_handle * + (*get_named_child_node)(struct fwnode_handle *fwnode, const char *name); +}; + +#define fwnode_has_op(fwnode, op) \ + ((fwnode) && (fwnode)->ops && (fwnode)->ops->op) +#define fwnode_call_int_op(fwnode, op, ...) \ + (fwnode ? (fwnode_has_op(fwnode, op) ? \ + (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \ + -EINVAL) +#define fwnode_call_ptr_op(fwnode, op, ...) \ + (fwnode_has_op(fwnode, op) ? \ + (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL) +#define fwnode_call_void_op(fwnode, op, ...) \ + do { \ + if (fwnode_has_op(fwnode, op)) \ + (fwnode)->ops->op(fwnode, ## __VA_ARGS__); \ + } while (false) + #endif diff --git a/include/linux/of.h b/include/linux/of.h index 29b7b738b509..cdbfa88c32cf 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -100,10 +100,12 @@ struct of_reconfig_data { /* initialize a node */ extern struct kobj_type of_node_ktype; +extern const struct fwnode_operations of_fwnode_ops; static inline void of_node_init(struct device_node *node) { kobject_init(&node->kobj, &of_node_ktype); node->fwnode.type = FWNODE_OF; + node->fwnode.ops = &of_fwnode_ops; } /* true when node is initialized */ -- cgit v1.2.3 From 3b27d00e7b6d7c889d87fd00df600c495b968e30 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 6 Jun 2017 12:37:38 +0300 Subject: device property: Move fwnode graph ops to firmware specific locations Move firmware specific implementations of the fwnode graph operations to firmware specific locations. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Acked-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 8f64b3ae9c57..e315d867d631 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -57,6 +57,11 @@ struct fwnode_endpoint { * @get_parent: Return the parent of an fwnode. * @get_next_child_node: Return the next child node in an iteration. * @get_named_child_node: Return a child node with a given name. + * @graph_get_next_endpoint: Return an endpoint node in an iteration. + * @graph_get_remote_endpoint: Return the remote endpoint node of a local + * endpoint node. + * @graph_get_port_parent: Return the parent node of a port node. + * @graph_parse_endpoint: Parse endpoint for port and endpoint id. */ struct fwnode_operations { void (*get)(struct fwnode_handle *fwnode); @@ -76,6 +81,15 @@ struct fwnode_operations { struct fwnode_handle *child); struct fwnode_handle * (*get_named_child_node)(struct fwnode_handle *fwnode, const char *name); + struct fwnode_handle * + (*graph_get_next_endpoint)(struct fwnode_handle *fwnode, + struct fwnode_handle *prev); + struct fwnode_handle * + (*graph_get_remote_endpoint)(struct fwnode_handle *fwnode); + struct fwnode_handle * + (*graph_get_port_parent)(struct fwnode_handle *fwnode); + int (*graph_parse_endpoint)(struct fwnode_handle *fwnode, + struct fwnode_endpoint *endpoint); }; #define fwnode_has_op(fwnode, op) \ -- cgit v1.2.3 From 2294b3af05e9b3fe0b84a78971e709037bd7593c Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 6 Jun 2017 12:37:39 +0300 Subject: device property: Introduce fwnode_device_is_available() Add fwnode_device_is_available() to tell whether the device corresponding to a certain fwnode_handle is available for use. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Acked-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 1 + include/linux/property.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index e315d867d631..9ab375419189 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -66,6 +66,7 @@ struct fwnode_endpoint { struct fwnode_operations { void (*get)(struct fwnode_handle *fwnode); void (*put)(struct fwnode_handle *fwnode); + bool (*device_is_available)(struct fwnode_handle *fwnode); bool (*property_present)(struct fwnode_handle *fwnode, const char *propname); int (*property_read_int_array)(struct fwnode_handle *fwnode, diff --git a/include/linux/property.h b/include/linux/property.h index 2f482616a2f2..7be014af78ed 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -51,6 +51,7 @@ int device_property_read_string(struct device *dev, const char *propname, int device_property_match_string(struct device *dev, const char *propname, const char *string); +bool fwnode_device_is_available(struct fwnode_handle *fwnode); bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname); int fwnode_property_read_u8_array(struct fwnode_handle *fwnode, const char *propname, u8 *val, -- cgit v1.2.3 From 125ee6b3b0fa920c730b0991e6f083a9f5b1e4c3 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 6 Jun 2017 12:37:40 +0300 Subject: device property: Add FW type agnostic fwnode_graph_get_remote_node Add fwnode_graph_get_remote_node() function which is equivalent to of_graph_get_remote_node() on OF. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 7be014af78ed..0597a743aa66 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -281,6 +281,8 @@ struct fwnode_handle *fwnode_graph_get_remote_port( struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_endpoint( struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode, + u32 port, u32 endpoint); int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); -- cgit v1.2.3 From 6a71d8d77795e0f7d887baa95bfc0d1d2bc74899 Mon Sep 17 00:00:00 2001 From: Kieran Bingham Date: Tue, 6 Jun 2017 12:37:41 +0300 Subject: device property: Add fwnode_graph_get_port_parent Provide a helper to obtain the parent device fwnode without first parsing the remote-endpoint as per fwnode_graph_get_remote_port_parent. Signed-off-by: Kieran Bingham Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 0597a743aa66..7e77039e6b81 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -275,6 +275,8 @@ void *device_get_mac_address(struct device *dev, char *addr, int alen); struct fwnode_handle *fwnode_graph_get_next_endpoint( struct fwnode_handle *fwnode, struct fwnode_handle *prev); +struct fwnode_handle * +fwnode_graph_get_port_parent(struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_port_parent( struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_port( -- cgit v1.2.3 From 3c85d6db5e5f05ae6c3d7f5a0ceceb43746a5ca7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 19 Jun 2017 04:12:00 +0200 Subject: sched/loadavg: Generalize "_idle" naming to "_nohz" The loadavg naming code still assumes that nohz == idle whereas its code is actually handling well both nohz idle and nohz full. So lets fix the naming according to what the code actually does, to unconfuse the reader. Signed-off-by: Frederic Weisbecker Acked-by: Rik van Riel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1497838322-10913-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 4995b717500b..7d3f75db23e5 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -23,11 +23,11 @@ static inline void set_cpu_sd_state_idle(void) { } #endif #ifdef CONFIG_NO_HZ_COMMON -void calc_load_enter_idle(void); -void calc_load_exit_idle(void); +void calc_load_nohz_start(void); +void calc_load_nohz_stop(void); #else -static inline void calc_load_enter_idle(void) { } -static inline void calc_load_exit_idle(void) { } +static inline void calc_load_nohz_start(void) { } +static inline void calc_load_nohz_stop(void) { } #endif /* CONFIG_NO_HZ_COMMON */ #if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) -- cgit v1.2.3 From a8ade55ffd1c1acef053a2d05f30e91a1c114f58 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 7 Jun 2017 17:49:56 +0300 Subject: net/mlx5e: Offload TC matching on ip ttl Enable offloading of TC matching on ip ttl / hop-limit As matching on ttl is supported only by newer HW brands (ConnectX-5), we should do capability check before attempting to offload that. Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e86ef880a149..694f51d388d9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -243,7 +243,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_first_prio[0x1]; u8 outer_first_cfi[0x1]; u8 outer_first_vid[0x1]; - u8 reserved_at_7[0x1]; + u8 outer_ipv4_ttl[0x1]; u8 outer_second_prio[0x1]; u8 outer_second_cfi[0x1]; u8 outer_second_vid[0x1]; @@ -380,7 +380,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 tcp_sport[0x10]; u8 tcp_dport[0x10]; - u8 reserved_at_c0[0x20]; + u8 reserved_at_c0[0x18]; + u8 ttl_hoplimit[0x8]; u8 udp_sport[0x10]; u8 udp_dport[0x10]; -- cgit v1.2.3 From 0c0316f516f51e6fcd0b23d61c37f9f5f846f978 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 13 Jun 2017 11:09:57 +0300 Subject: net/mlx5e: Add header re-write offloading of IPv6 hop-limit For environments where flow-based ipv6 router is offloaded. Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 694f51d388d9..4b547f551a2e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4620,6 +4620,7 @@ enum { MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0 = 0x14, MLX5_ACTION_IN_FIELD_OUT_SIPV4 = 0x15, MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16, + MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47, }; struct mlx5_ifc_alloc_modify_header_context_out_bits { -- cgit v1.2.3 From 4717628938423fcba0aa8fa889e9fed4eb6a655f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 18 Apr 2017 13:35:39 +0300 Subject: net/mlx5: Add MCC (Management Component Control) register definitions MCC (Management Component Control) allows to control a firmware component update. MCDA (Management Component Data Access) allows to read and write a firmware component. MCQI (Management Component Query Information) allows to query information about firmware components. Signed-off-by: Or Gerlitz Signed-off-by: Yotam Gigi Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 ++ include/linux/mlx5/mlx5_ifc.h | 82 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bf15e87da8fa..750701b3b863 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -131,6 +131,9 @@ enum { MLX5_REG_MPCNT = 0x9051, MLX5_REG_MTPPS = 0x9053, MLX5_REG_MTPPSE = 0x9054, + MLX5_REG_MCQI = 0x9061, + MLX5_REG_MCC = 0x9062, + MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4b547f551a2e..28468ad804be 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8165,6 +8165,85 @@ struct mlx5_ifc_mtppse_reg_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_mcqi_cap_bits { + u8 supported_info_bitmask[0x20]; + + u8 component_size[0x20]; + + u8 max_component_size[0x20]; + + u8 log_mcda_word_size[0x4]; + u8 reserved_at_64[0xc]; + u8 mcda_max_write_size[0x10]; + + u8 rd_en[0x1]; + u8 reserved_at_81[0x1]; + u8 match_chip_id[0x1]; + u8 match_psid[0x1]; + u8 check_user_timestamp[0x1]; + u8 match_base_guid_mac[0x1]; + u8 reserved_at_86[0x1a]; +}; + +struct mlx5_ifc_mcqi_reg_bits { + u8 read_pending_component[0x1]; + u8 reserved_at_1[0xf]; + u8 component_index[0x10]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x1b]; + u8 info_type[0x5]; + + u8 info_size[0x20]; + + u8 offset[0x20]; + + u8 reserved_at_a0[0x10]; + u8 data_size[0x10]; + + u8 data[0][0x20]; +}; + +struct mlx5_ifc_mcc_reg_bits { + u8 reserved_at_0[0x4]; + u8 time_elapsed_since_last_cmd[0xc]; + u8 reserved_at_10[0x8]; + u8 instruction[0x8]; + + u8 reserved_at_20[0x10]; + u8 component_index[0x10]; + + u8 reserved_at_40[0x8]; + u8 update_handle[0x18]; + + u8 handle_owner_type[0x4]; + u8 handle_owner_host_id[0x4]; + u8 reserved_at_68[0x1]; + u8 control_progress[0x7]; + u8 error_code[0x8]; + u8 reserved_at_78[0x4]; + u8 control_state[0x4]; + + u8 component_size[0x20]; + + u8 reserved_at_a0[0x60]; +}; + +struct mlx5_ifc_mcda_reg_bits { + u8 reserved_at_0[0x8]; + u8 update_handle[0x18]; + + u8 offset[0x20]; + + u8 reserved_at_40[0x10]; + u8 size[0x10]; + + u8 reserved_at_60[0x20]; + + u8 data[0][0x20]; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -8214,6 +8293,9 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mtppse_reg_bits mtppse_reg; struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits; struct mlx5_ifc_fpga_cap_bits fpga_cap_bits; + struct mlx5_ifc_mcqi_reg_bits mcqi_reg; + struct mlx5_ifc_mcc_reg_bits mcc_reg; + struct mlx5_ifc_mcda_reg_bits mcda_reg; u8 reserved_at_0[0x60e0]; }; -- cgit v1.2.3 From 0ab87743cc8c5bcd482daf71961ed5fc45349e01 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 Jun 2017 15:25:38 +0300 Subject: net/mlx5: Enhance MCAM reg to allow query on access reg support Enhance MCAM to allow the driver to query which access regs are supported. For now, expose the regs needed for FW flashing. Signed-off-by: Or Gerlitz Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 3 +++ include/linux/mlx5/mlx5_ifc.h | 13 +++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b26a478930eb..556e1c31b5d0 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1094,6 +1094,9 @@ enum mlx5_mcam_feature_groups { #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \ MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld) +#define MLX5_CAP_MCAM_REG(mdev, reg) \ + MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg) + #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 28468ad804be..8f197b070cea 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -7745,6 +7745,18 @@ struct mlx5_ifc_mcam_enhanced_features_bits { u8 pcie_performance_group[0x1]; }; +struct mlx5_ifc_mcam_access_reg_bits { + u8 reserved_at_0[0x1c]; + u8 mcda[0x1]; + u8 mcc[0x1]; + u8 mcqi[0x1]; + u8 reserved_at_1f[0x1]; + + u8 regs_95_to_64[0x20]; + u8 regs_63_to_32[0x20]; + u8 regs_31_to_0[0x20]; +}; + struct mlx5_ifc_mcam_reg_bits { u8 reserved_at_0[0x8]; u8 feature_group[0x8]; @@ -7754,6 +7766,7 @@ struct mlx5_ifc_mcam_reg_bits { u8 reserved_at_20[0x20]; union { + struct mlx5_ifc_mcam_access_reg_bits access_regs; u8 reserved_at_0[0x80]; } mng_access_reg_cap_mask; -- cgit v1.2.3 From 137ffd15f71ec29ff1a57728081569698591225a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 13 Jun 2017 18:12:13 +0300 Subject: net/mlx5: Fix offset of hca cap reserved field The offending commit pushed fwd the field by two bits but didn't increment the offset, fix that. Currently, no damage was done b/c this is just a field name, but lets have it right. Fixes: f32f5bd2eb7e ('net/mlx5: Configure cache line size for start and end padding') Signed-off-by: Or Gerlitz Reported-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8f197b070cea..d6b99d5d0f24 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -824,7 +824,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cc_modify_allowed[0x1]; u8 start_pad[0x1]; u8 cache_line_128byte[0x1]; - u8 reserved_at_163[0xb]; + u8 reserved_at_165[0xb]; u8 gid_table_size[0x10]; u8 out_of_seq_cnt[0x1]; -- cgit v1.2.3 From c07dfcb45877fbc6798fa042bab3c4b85378efd4 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Thu, 22 Jun 2017 10:29:53 -0400 Subject: mbcache: make mbcache naming more generic Make names more generic so that mbcache usage is not limited to block sharing. In a subsequent patch in the series ("ext4: xattr inode deduplication"), we start using the mbcache code for sharing xattr inodes. With that patch, old mb_cache_entry.e_block field could be holding either a block number or an inode number. Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o --- include/linux/mbcache.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 86c9a8b480c5..e1bc73414983 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -19,15 +19,15 @@ struct mb_cache_entry { u32 e_key; u32 e_referenced:1; u32 e_reusable:1; - /* Block number of hashed block - stable during lifetime of the entry */ - sector_t e_block; + /* User provided value - stable during lifetime of the entry */ + u64 e_value; }; struct mb_cache *mb_cache_create(int bucket_bits); void mb_cache_destroy(struct mb_cache *cache); int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, - sector_t block, bool reusable); + u64 value, bool reusable); void __mb_cache_entry_free(struct mb_cache_entry *entry); static inline int mb_cache_entry_put(struct mb_cache *cache, struct mb_cache_entry *entry) @@ -38,10 +38,9 @@ static inline int mb_cache_entry_put(struct mb_cache *cache, return 1; } -void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key, - sector_t block); +void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, - sector_t block); + u64 value); struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, u32 key); struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache, -- cgit v1.2.3 From 2f74f09bce4f8d0236f20174a6daae63e10fe733 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:07 -0600 Subject: efi: parse ARM processor error Add support for ARM Common Platform Error Record (CPER). UEFI 2.6 specification adds support for ARM specific processor error information to be reported as part of the CPER records. This provides more detail on for processor error logs. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- include/linux/cper.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cper.h b/include/linux/cper.h index dcacb1a72e26..4c671fc2081e 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -180,6 +180,10 @@ enum { #define CPER_SEC_PROC_IPF \ UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00, \ 0x80, 0xC7, 0x3C, 0x88, 0x81) +/* Processor Specific: ARM */ +#define CPER_SEC_PROC_ARM \ + UUID_LE(0xE19E3D16, 0xBC11, 0x11E4, 0x9C, 0xAA, 0xC2, 0x05, \ + 0x1D, 0x5D, 0x46, 0xB0) /* Platform Memory */ #define CPER_SEC_PLATFORM_MEM \ UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ @@ -255,6 +259,22 @@ enum { #define CPER_PCIE_SLOT_SHIFT 3 +#define CPER_ARM_VALID_MPIDR BIT(0) +#define CPER_ARM_VALID_AFFINITY_LEVEL BIT(1) +#define CPER_ARM_VALID_RUNNING_STATE BIT(2) +#define CPER_ARM_VALID_VENDOR_INFO BIT(3) + +#define CPER_ARM_INFO_VALID_MULTI_ERR BIT(0) +#define CPER_ARM_INFO_VALID_FLAGS BIT(1) +#define CPER_ARM_INFO_VALID_ERR_INFO BIT(2) +#define CPER_ARM_INFO_VALID_VIRT_ADDR BIT(3) +#define CPER_ARM_INFO_VALID_PHYSICAL_ADDR BIT(4) + +#define CPER_ARM_INFO_FLAGS_FIRST BIT(0) +#define CPER_ARM_INFO_FLAGS_LAST BIT(1) +#define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2) +#define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3) + /* * All tables and structs must be byte-packed to match CPER * specification, since the tables are provided by the system BIOS @@ -340,6 +360,40 @@ struct cper_ia_proc_ctx { __u64 mm_reg_addr; }; +/* ARM Processor Error Section */ +struct cper_sec_proc_arm { + __u32 validation_bits; + __u16 err_info_num; /* Number of Processor Error Info */ + __u16 context_info_num; /* Number of Processor Context Info Records*/ + __u32 section_length; + __u8 affinity_level; + __u8 reserved[3]; /* must be zero */ + __u64 mpidr; + __u64 midr; + __u32 running_state; /* Bit 0 set - Processor running. PSCI = 0 */ + __u32 psci_state; +}; + +/* ARM Processor Error Information Structure */ +struct cper_arm_err_info { + __u8 version; + __u8 length; + __u16 validation_bits; + __u8 type; + __u16 multiple_error; + __u8 flags; + __u64 error_info; + __u64 virt_fault_addr; + __u64 physical_fault_addr; +}; + +/* ARM Processor Context Information Structure */ +struct cper_arm_ctx_info { + __u16 version; + __u16 type; + __u32 size; +}; + /* Old Memory Error Section UEFI 2.1, 2.2 */ struct cper_sec_mem_err_old { __u64 validation_bits; -- cgit v1.2.3 From 7a9ca53aea10ad4677a0f347ad7639c304b80194 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Thu, 22 Jun 2017 11:46:48 -0400 Subject: quota: add get_inode_usage callback to transfer multi-inode charges Ext4 ea_inode feature allows storing xattr values in external inodes to be able to store values that are bigger than a block in size. Ext4 also has deduplication support for these type of inodes. With deduplication, the actual storage waste is eliminated but the users of such inodes are still charged full quota for the inodes as if there was no sharing happening in the background. This design requires ext4 to manually charge the users because the inodes are shared. An implication of this is that, if someone calls chown on a file that has such references we need to transfer the quota for the file and xattr inodes. Current dquot_transfer() function implicitly transfers one inode charge. With ea_inode feature, we would like to transfer multiple inode charges. Add get_inode_usage callback which can interrogate the total number of inodes that were charged for a given inode. [ Applied fix from Colin King to make sure the 'ret' variable is initialized on the successful return path. Detected by CoverityScan, CID#1446616 ("Uninitialized scalar variable") --tytso] Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o Signed-off-by: Colin Ian King Acked-by: Jan Kara --- include/linux/quota.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 3434eef2a5aa..bfd077ca6ac3 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -332,6 +332,8 @@ struct dquot_operations { * quota code only */ qsize_t *(*get_reserved_space) (struct inode *); int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */ + /* Get number of inodes that were charged for a given inode */ + int (*get_inode_usage) (struct inode *, qsize_t *); /* Get next ID with active quota structure */ int (*get_next_id) (struct super_block *sb, struct kqid *qid); }; -- cgit v1.2.3 From 92af08990cc49408119ca2549dfe9e37235864d8 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Tue, 20 Jun 2017 16:38:28 -0700 Subject: of: make of_fdt_is_compatible() static The callers of of_fdt_is_compatible() are all in fdt.c so make it static. Signed-off-by: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of_fdt.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 1dfbfd0d8040..013c5418aeec 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -31,9 +31,6 @@ extern void *of_fdt_get_property(const void *blob, unsigned long node, const char *name, int *size); -extern int of_fdt_is_compatible(const void *blob, - unsigned long node, - const char *compat); extern bool of_fdt_is_big_endian(const void *blob, unsigned long node); extern int of_fdt_match(const void *blob, unsigned long node, -- cgit v1.2.3 From d59f6617eef0f76e34f7a9993f5645c5ef467e42 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:05 +0200 Subject: genirq: Allow fwnode to carry name information only In order to provide proper debug interface it's required to have domain names available when the domain is added. Non fwnode based architectures like x86 have no way to do so. It's not possible to use domain ops or host data for this as domain ops might be the same for several instances, but the names have to be unique. Extend the irqchip fwnode to allow transporting the domain name. If no node is supplied, create a 'unknown-N' placeholder. Warn if an invalid node is supplied and treat it like no node. This happens e.g. with i2 devices on x86 which hand in an ACPI type node which has no interface for retrieving the name. [ Folded a fix from Marc to make DT name parsing work ] Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: Jens Axboe Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235443.588784933@linutronix.de --- include/linux/irqdomain.h | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 9f3616085423..9cf32a2fbe69 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -189,6 +189,9 @@ enum { /* Irq domain implements MSI remapping */ IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + /* Irq domain name was allocated in __irq_domain_add() */ + IRQ_DOMAIN_NAME_ALLOCATED = (1 << 6), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -203,7 +206,33 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) } #ifdef CONFIG_IRQ_DOMAIN -struct fwnode_handle *irq_domain_alloc_fwnode(void *data); +struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, + const char *name, void *data); + +enum { + IRQCHIP_FWNODE_REAL, + IRQCHIP_FWNODE_NAMED, + IRQCHIP_FWNODE_NAMED_ID, +}; + +static inline +struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) +{ + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL); +} + +static inline +struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) +{ + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name, + NULL); +} + +static inline struct fwnode_handle *irq_domain_alloc_fwnode(void *data) +{ + return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, data); +} + void irq_domain_free_fwnode(struct fwnode_handle *fwnode); struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, irq_hw_number_t hwirq_max, int direct_max, -- cgit v1.2.3 From 9dc6be3d419398eae9a19cd09b7969ceff8eaf10 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:16 +0200 Subject: genirq/irqdomain: Add map counter Add a map counter instead of counting radix tree entries for diagnosis. That also gives correct information for linear domains. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: Jens Axboe Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.459397746@linutronix.de --- include/linux/irqdomain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 9cf32a2fbe69..17ccd54d936d 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -130,6 +130,7 @@ struct irq_domain_chip_generic; * @host_data: private data pointer for use by owner. Not touched by irq_domain * core code. * @flags: host per irq_domain flags + * @mapcount: The number of mapped interrupts * * Optional elements * @of_node: Pointer to device tree nodes associated with the irq_domain. Used @@ -152,6 +153,7 @@ struct irq_domain { const struct irq_domain_ops *ops; void *host_data; unsigned int flags; + unsigned int mapcount; /* Optional data */ struct fwnode_handle *fwnode; -- cgit v1.2.3 From 087cdfb662ae50e3826e7cd2e54b6519d07b60f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:17 +0200 Subject: genirq/debugfs: Add proper debugfs interface Debugging (hierarchical) interupt domains is tedious as there is no information about the hierarchy and no information about states of interrupts in the various domain levels. Add a debugfs directory 'irq' and subdirectories 'domains' and 'irqs'. The domains directory contains the domain files. The content is information about the domain. If the domain is part of a hierarchy then the parent domains are printed as well. # ls /sys/kernel/debug/irq/domains/ default INTEL-IR-2 INTEL-IR-MSI-2 IO-APIC-IR-2 PCI-MSI DMAR-MSI INTEL-IR-3 INTEL-IR-MSI-3 IO-APIC-IR-3 unknown-1 INTEL-IR-0 INTEL-IR-MSI-0 IO-APIC-IR-0 IO-APIC-IR-4 VECTOR INTEL-IR-1 INTEL-IR-MSI-1 IO-APIC-IR-1 PCI-HT # cat /sys/kernel/debug/irq/domains/VECTOR name: VECTOR size: 0 mapped: 216 flags: 0x00000041 # cat /sys/kernel/debug/irq/domains/IO-APIC-IR-0 name: IO-APIC-IR-0 size: 24 mapped: 19 flags: 0x00000041 parent: INTEL-IR-3 name: INTEL-IR-3 size: 65536 mapped: 167 flags: 0x00000041 parent: VECTOR name: VECTOR size: 0 mapped: 216 flags: 0x00000041 Unfortunately there is no per cpu information about the VECTOR domain (yet). The irqs directory contains detailed information about mapped interrupts. # cat /sys/kernel/debug/irq/irqs/3 handler: handle_edge_irq status: 0x00004000 istate: 0x00000000 ddepth: 1 wdepth: 0 dstate: 0x01018000 IRQD_IRQ_DISABLED IRQD_SINGLE_TARGET IRQD_MOVE_PCNTXT node: 0 affinity: 0-143 effectiv: 0 pending: domain: IO-APIC-IR-0 hwirq: 0x3 chip: IR-IO-APIC flags: 0x10 IRQCHIP_SKIP_SET_WAKE parent: domain: INTEL-IR-3 hwirq: 0x20000 chip: INTEL-IR flags: 0x0 parent: domain: VECTOR hwirq: 0x3 chip: APIC flags: 0x0 This was developed to simplify the debugging of the managed affinity changes. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: Jens Axboe Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.537566163@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 4 ++++ include/linux/irqdomain.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index c9be57931b58..d425a3a09722 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -46,6 +46,7 @@ struct pt_regs; * @rcu: rcu head for delayed free * @kobj: kobject used to represent this struct in sysfs * @dir: /proc/irq/ procfs entry + * @debugfs_file: dentry for the debugfs file * @name: flow handler name for /proc/interrupts output */ struct irq_desc { @@ -88,6 +89,9 @@ struct irq_desc { #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + struct dentry *debugfs_file; +#endif #ifdef CONFIG_SPARSE_IRQ struct rcu_head rcu; struct kobject kobj; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 17ccd54d936d..914b0c31d233 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -139,6 +139,7 @@ struct irq_domain_chip_generic; * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * @debugfs_file: dentry for the domain debugfs file * * Revmap data, used internally by irq_domain * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that @@ -162,6 +163,9 @@ struct irq_domain { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + struct dentry *debugfs_file; +#endif /* reverse map data. The linear map gets appended to the irq_domain */ irq_hw_number_t hwirq_max; -- cgit v1.2.3 From 1bb0401680da156ce1549e915e711bf5b2534cc5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:18 +0200 Subject: genirq: Add missing comment for IRQD_STARTED Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Jens Axboe Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.614913014@linutronix.de --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index d996314b6522..7e62e10e5856 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -200,6 +200,7 @@ struct irq_data { * IRQD_WAKEUP_ARMED - Wakeup mode armed * IRQD_FORWARDED_TO_VCPU - The interrupt is forwarded to a VCPU * IRQD_AFFINITY_MANAGED - Affinity is auto-managed by the kernel + * IRQD_IRQ_STARTED - Startup state of the interrupt */ enum { IRQD_TRIGGER_MASK = 0xf, -- cgit v1.2.3 From cdd16365b0bd7c0cd19e2cc768b6bdc8021f32c3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:19 +0200 Subject: genirq: Provide irq_fixup_move_pending() If an CPU goes offline, the interrupts are migrated away, but a eventually pending interrupt move, which has not yet been made effective is kept pending even if the outgoing CPU is the sole target of the pending affinity mask. What's worse is, that the pending affinity mask is discarded even if it would contain a valid subset of the online CPUs. Implement a helper function which allows to avoid these issues. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235444.691345468@linutronix.de --- include/linux/irq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 7e62e10e5856..d008065e2f4d 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -491,9 +491,14 @@ extern void irq_migrate_all_off_this_cpu(void); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); void irq_move_masked_irq(struct irq_data *data); +bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); #else static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } +static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) +{ + return false; +} #endif extern int no_irq_affinity; -- cgit v1.2.3 From f0383c24b4855f6a4b5a358c7b2d2c16e0437e9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:29 +0200 Subject: genirq/cpuhotplug: Add support for cleaning up move in progress In order to move x86 to the generic hotplug migration code, add support for cleaning up move in progress bits. On architectures which have this x86 specific (mis)feature not enabled, this is optimized out by the compiler. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.525817311@linutronix.de --- include/linux/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index d008065e2f4d..299271a4953c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -491,10 +491,12 @@ extern void irq_migrate_all_off_this_cpu(void); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); void irq_move_masked_irq(struct irq_data *data); +void irq_force_complete_move(struct irq_desc *desc); bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); #else static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } +static inline void irq_force_complete_move(struct irq_desc *desc) { } static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) { return false; -- cgit v1.2.3 From 36d84fb45140f151fa4e145381dbce5e5ffed24d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:34 +0200 Subject: genirq: Move irq_fixup_move_pending() to core Now that x86 uses the generic code, the function declaration and inline stub can move to the core internal header. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235445.928156166@linutronix.de --- include/linux/irq.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 299271a4953c..2b7e5a70d05f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -492,15 +492,10 @@ extern void irq_migrate_all_off_this_cpu(void); void irq_move_irq(struct irq_data *data); void irq_move_masked_irq(struct irq_data *data); void irq_force_complete_move(struct irq_desc *desc); -bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); #else static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } static inline void irq_force_complete_move(struct irq_desc *desc) { } -static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) -{ - return false; -} #endif extern int no_irq_affinity; -- cgit v1.2.3 From 0d3f54257dc300f2db480d6a46b34bdb87f18c1b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:38 +0200 Subject: genirq: Introduce effective affinity mask There is currently no way to evaluate the effective affinity mask of a given interrupt. Many irq chips allow only a single target CPU or a subset of CPUs in the affinity mask. Updating the mask at the time of setting the affinity to the subset would be counterproductive because information for cpu hotplug about assigned interrupt affinities gets lost. On CPU hotplug it's also pointless to force migrate an interrupt, which is not targeted at the CPU effectively. But currently the information is not available. Provide a seperate mask to be updated by the irq_chip->irq_set_affinity() implementations. Implement the read only proc files so the user can see the effective mask as well w/o trying to deduce it from /proc/interrupts. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.247834245@linutronix.de --- include/linux/irq.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 2b7e5a70d05f..4087ef268ba9 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -137,6 +137,9 @@ struct irq_domain; * @affinity: IRQ affinity on SMP. If this is an IPI * related irq, then this is the mask of the * CPUs to which an IPI can be sent. + * @effective_affinity: The effective IRQ affinity on SMP as some irq + * chips do not allow multi CPU destinations. + * A subset of @affinity. * @msi_desc: MSI descriptor * @ipi_offset: Offset of first IPI target cpu in @affinity. Optional. */ @@ -148,6 +151,9 @@ struct irq_common_data { void *handler_data; struct msi_desc *msi_desc; cpumask_var_t affinity; +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + cpumask_var_t effective_affinity; +#endif #ifdef CONFIG_GENERIC_IRQ_IPI unsigned int ipi_offset; #endif @@ -737,6 +743,29 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) return d->common->affinity; } +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK +static inline +struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +{ + return d->common->effective_affinity; +} +static inline void irq_data_update_effective_affinity(struct irq_data *d, + const struct cpumask *m) +{ + cpumask_copy(d->common->effective_affinity, m); +} +#else +static inline void irq_data_update_effective_affinity(struct irq_data *d, + const struct cpumask *m) +{ +} +static inline +struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +{ + return d->common->affinity; +} +#endif + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, -- cgit v1.2.3 From 54fdf6a0875ca380647ac1cc9b5b8f2dbbbfa131 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:47 +0200 Subject: genirq: Introduce IRQD_MANAGED_SHUTDOWN Affinity managed interrupts should keep their assigned affinity accross CPU hotplug. To avoid magic hackery in device drivers, the core code shall manage them transparently. This will set these interrupts into a managed shutdown state when the last CPU of the assigned affinity mask goes offline. The interrupt will be restarted when one of the CPUs in the assigned affinity mask comes back online. Introduce the necessary state flag and the accessor functions. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.954523476@linutronix.de --- include/linux/irq.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 4087ef268ba9..0e37276c5315 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -207,6 +207,8 @@ struct irq_data { * IRQD_FORWARDED_TO_VCPU - The interrupt is forwarded to a VCPU * IRQD_AFFINITY_MANAGED - Affinity is auto-managed by the kernel * IRQD_IRQ_STARTED - Startup state of the interrupt + * IRQD_MANAGED_SHUTDOWN - Interrupt was shutdown due to empty affinity + * mask. Applies only to affinity managed irqs. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -225,6 +227,7 @@ enum { IRQD_FORWARDED_TO_VCPU = (1 << 20), IRQD_AFFINITY_MANAGED = (1 << 21), IRQD_IRQ_STARTED = (1 << 22), + IRQD_MANAGED_SHUTDOWN = (1 << 23), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -343,6 +346,11 @@ static inline bool irqd_is_started(struct irq_data *d) return __irqd_to_state(d) & IRQD_IRQ_STARTED; } +static inline bool irqd_is_managed_shutdown(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) -- cgit v1.2.3 From 761ea388e8c4e3ac883a94e16bcc8c51fa419d4f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:50 +0200 Subject: genirq: Handle managed irqs gracefully in irq_startup() Affinity managed interrupts should keep their assigned affinity accross CPU hotplug. To avoid magic hackery in device drivers, the core code shall manage them transparently and set these interrupts into a managed shutdown state when the last CPU of the assigned affinity mask goes offline. The interrupt will be restarted when one of the CPUs in the assigned affinity mask comes back online. Add the necessary logic to irq_startup(). If an interrupt is requested and started up, the code checks whether it is affinity managed and if so, it checks whether a CPU in the interrupts affinity mask is online. If not, it puts the interrupt into managed shutdown state. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235447.189851170@linutronix.de --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 0e37276c5315..807042b46af1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -346,7 +346,7 @@ static inline bool irqd_is_started(struct irq_data *d) return __irqd_to_state(d) & IRQD_IRQ_STARTED; } -static inline bool irqd_is_managed_shutdown(struct irq_data *d) +static inline bool irqd_is_managed_and_shutdown(struct irq_data *d) { return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; } -- cgit v1.2.3 From c5cb83bb337c25caae995d992d1cdf9b317f83de Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:51 +0200 Subject: genirq/cpuhotplug: Handle managed IRQs on CPU hotplug If a CPU goes offline, interrupts affine to the CPU are moved away. If the outgoing CPU is the last CPU in the affinity mask the migration code breaks the affinity and sets it it all online cpus. This is a problem for affinity managed interrupts as CPU hotplug is often used for power management purposes. If the affinity is broken, the interrupt is not longer affine to the CPUs to which it was allocated. The affinity spreading allows to lay out multi queue devices in a way that they are assigned to a single CPU or a group of CPUs. If the last CPU goes offline, then the queue is not longer used, so the interrupt can be shutdown gracefully and parked until one of the assigned CPUs comes online again. Add a graceful shutdown mechanism into the irq affinity breaking code path, mark the irq as MANAGED_SHUTDOWN and leave the affinity mask unmodified. In the online path, scan the active interrupts for managed interrupts and if the interrupt is functional and the newly online CPU is part of the affinity mask, restart the interrupt if it is marked MANAGED_SHUTDOWN or if the interrupts is started up, try to add the CPU back to the effective affinity mask. Originally-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170619235447.273417334@linutronix.de --- include/linux/cpuhotplug.h | 1 + include/linux/irq.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0f2a80377520..c15f22c54535 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -124,6 +124,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE_IDLE, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, + CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_PERF_ONLINE, CPUHP_AP_PERF_X86_ONLINE, CPUHP_AP_PERF_X86_UNCORE_ONLINE, diff --git a/include/linux/irq.h b/include/linux/irq.h index 807042b46af1..19cea6326599 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -500,7 +500,12 @@ extern int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask, bool force); extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info); +#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION) extern void irq_migrate_all_off_this_cpu(void); +extern int irq_affinity_online_cpu(unsigned int cpu); +#else +# define irq_affinity_online_cpu NULL +#endif #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); -- cgit v1.2.3 From d52dd44175bd27ad9d8e34a994fb80877c1f6d61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:52 +0200 Subject: genirq: Introduce IRQD_SINGLE_TARGET flag Many interrupt chips allow only a single CPU as interrupt target. The core code has no knowledge about that. That's unfortunate as it could avoid trying to readd a newly online CPU to the effective affinity mask. Add the status flag and the necessary accessors. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235447.352343969@linutronix.de --- include/linux/irq.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 19cea6326599..00db35b61e9e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -209,6 +209,7 @@ struct irq_data { * IRQD_IRQ_STARTED - Startup state of the interrupt * IRQD_MANAGED_SHUTDOWN - Interrupt was shutdown due to empty affinity * mask. Applies only to affinity managed irqs. + * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -228,6 +229,7 @@ enum { IRQD_AFFINITY_MANAGED = (1 << 21), IRQD_IRQ_STARTED = (1 << 22), IRQD_MANAGED_SHUTDOWN = (1 << 23), + IRQD_SINGLE_TARGET = (1 << 24), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -276,6 +278,20 @@ static inline bool irqd_is_level_type(struct irq_data *d) return __irqd_to_state(d) & IRQD_LEVEL; } +/* + * Must only be called of irqchip.irq_set_affinity() or low level + * hieararchy domain allocation functions. + */ +static inline void irqd_set_single_target(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_SINGLE_TARGET; +} + +static inline bool irqd_is_single_target(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_SINGLE_TARGET; +} + static inline bool irqd_is_wakeup_set(struct irq_data *d) { return __irqd_to_state(d) & IRQD_WAKEUP_STATE; -- cgit v1.2.3 From 61d0a000b7746665c7cfcff766532f6f2a922a61 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 22 Jun 2017 11:34:57 +0100 Subject: genirq/irqdomain: Add irq_domain_update_bus_token helper We can have irq domains that are identified by the same fwnode (because they are serviced by the same HW), and yet have different functionnality (because they serve different busses, for example). This is what we use the bus_token field. Since we don't use this field when generating the domain name, all the aliasing domains will get the same name, and the debugfs file creation fails. Also, bus_token is updated by individual drivers, and the core code is unaware of that update. In order to sort this mess, let's introduce a helper that takes care of updating bus_token, and regenerate the debugfs file. A separate patch will update all the individual users. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 914b0c31d233..222f47af12f4 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -273,6 +273,9 @@ static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode) return fwnode && fwnode->type == FWNODE_IRQCHIP; } +extern void irq_domain_update_bus_token(struct irq_domain *domain, + enum irq_domain_bus_token bus_token); + static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) -- cgit v1.2.3 From 6a6544e520abecd484ab8b67fb50d1fc003f3275 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Jun 2017 22:17:44 +0100 Subject: genirq/irqdomain: Remove auto-recursive hierarchy support It did seem like a good idea at the time, but it never really caught on, and auto-recursive domains remain unused 3 years after having been introduced. Oh well, time for a late spring cleanup. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 222f47af12f4..cac77a5c5555 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -180,8 +180,8 @@ enum { /* Irq domain is hierarchical */ IRQ_DOMAIN_FLAG_HIERARCHY = (1 << 0), - /* Core calls alloc/free recursive through the domain hierarchy. */ - IRQ_DOMAIN_FLAG_AUTO_RECURSIVE = (1 << 1), + /* Irq domain name was allocated in __irq_domain_add() */ + IRQ_DOMAIN_NAME_ALLOCATED = (1 << 6), /* Irq domain is an IPI domain with virq per cpu */ IRQ_DOMAIN_FLAG_IPI_PER_CPU = (1 << 2), @@ -195,9 +195,6 @@ enum { /* Irq domain implements MSI remapping */ IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), - /* Irq domain name was allocated in __irq_domain_add() */ - IRQ_DOMAIN_NAME_ALLOCATED = (1 << 6), - /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -448,7 +445,7 @@ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, NULL); } -extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, +extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg); extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, -- cgit v1.2.3 From 297b64c74385fc7ea5dfff66105ab6465f2df49a Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:12 -0600 Subject: ras: acpi / apei: generate trace event for unrecognized CPER section The UEFI spec includes non-standard section type support in the Common Platform Error Record. This is defined in section N.2.3 of UEFI version 2.5. Currently if the CPER section's type (UUID) does not match any section type that the kernel knows how to parse, a trace event is not generated. Generate a trace event which contains the raw error data for non-standard section type error records. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Tested-by: Shiju Jose Signed-off-by: Will Deacon --- include/linux/ras.h | 12 ++++++++++++ include/linux/uuid.h | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ras.h b/include/linux/ras.h index ffb147185e8d..62fac3042dce 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -2,6 +2,7 @@ #define __RAS_H__ #include +#include #ifdef CONFIG_DEBUG_FS int ras_userspace_consumers(void); @@ -22,4 +23,15 @@ static inline void __init cec_init(void) { } static inline int cec_add_elem(u64 pfn) { return -ENODEV; } #endif +#ifdef CONFIG_RAS +void log_non_standard_event(const guid_t *sec_type, + const guid_t *fru_id, const char *fru_text, + const u8 sev, const u8 *err, const u32 len); +#else +static void log_non_standard_event(const guid_t *sec_type, + const guid_t *fru_id, const char *fru_text, + const u8 sev, const u8 *err, + const u32 len) { return; } +#endif + #endif /* __RAS_H__ */ diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 75f7182d5360..61641faca38b 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -18,8 +18,10 @@ #include +#define UUID_SIZE 16 + typedef struct { - __u8 b[16]; + __u8 b[UUID_SIZE]; } uuid_t; #define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ -- cgit v1.2.3 From e9279e83ad1f4b5af541a522a81888f828210b40 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:13 -0600 Subject: trace, ras: add ARM processor error trace event Currently there are trace events for the various RAS errors with the exception of ARM processor type errors. Add a new trace event for such errors so that the user will know when they occur. These trace events are consistent with the ARM processor error section type defined in UEFI 2.6 spec section N.2.4.4. Signed-off-by: Tyler Baicar Acked-by: Steven Rostedt Reviewed-by: Xie XiuQi Signed-off-by: Will Deacon --- include/linux/ras.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ras.h b/include/linux/ras.h index 62fac3042dce..7d61863ff265 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -3,6 +3,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_FS int ras_userspace_consumers(void); @@ -27,11 +28,13 @@ static inline int cec_add_elem(u64 pfn) { return -ENODEV; } void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); +void log_arm_hw_error(struct cper_sec_proc_arm *err); #else static void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len) { return; } +static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif #endif /* __RAS_H__ */ -- cgit v1.2.3 From 1a0915be192606fee64830b9c5d70b7ed59426b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 21 Jun 2017 08:26:46 +0200 Subject: mtd: partitions: add support for partition parsers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some devices have partitions that are kind of containers with extra subpartitions / volumes instead of e.g. a simple filesystem data. To support such cases we need to first create normal flash device partitions and then take care of these special ones. It's very common case for home routers. Depending on the vendor there are formats like TRX, Seama, TP-Link, WRGG & more. All of them are used to embed few partitions into a single one / single firmware file. Ideally all vendors would use some well documented / standardized format like UBI (and some probably start doing so), but there are still countless devices on the market using these poor vendor specific formats. This patch extends MTD subsystem by allowing to specify list of parsers that should be tried for a given partition. Supporting such poor formats is highly unlikely to be the top priority so these changes try to minimize maintenance cost to the minimum. It reuses existing code for these new parsers and just adds a one property and one new function. This implementation requires setting partition parsers in a flash parser. A proper change of bcm47xxpart will follow and in the future we will hopefully also find a solution for doing it with ofpart ("fixed-partitions"). Signed-off-by: Rafał Miłecki Signed-off-by: Brian Norris --- include/linux/mtd/partitions.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 06df1e06b6e0..c4beb70dacbd 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -20,6 +20,12 @@ * * For each partition, these fields are available: * name: string that will be used to label the partition's MTD device. + * types: some partitions can be containers using specific format to describe + * embedded subpartitions / volumes. E.g. many home routers use "firmware" + * partition that contains at least kernel and rootfs. In such case an + * extra parser is needed that will detect these dynamic partitions and + * report them to the MTD subsystem. If set this property stores an array + * of parser names to use when looking for subpartitions. * size: the partition size; if defined as MTDPART_SIZ_FULL, the partition * will extend to the end of the master MTD device. * offset: absolute starting position within the master MTD device; if @@ -38,6 +44,7 @@ struct mtd_partition { const char *name; /* identifier string */ + const char *const *types; /* names of parsers to use if any */ uint64_t size; /* partition size */ uint64_t offset; /* offset within the master MTD space */ uint32_t mask_flags; /* master MTD flags to mask out for this partition */ -- cgit v1.2.3 From 61a04101c8a486dec586b2657bffede1b3b979f3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 19 Jun 2017 13:08:52 +0300 Subject: NFC: st-nci: Get rid of platform data Legacy platform data must go away. We are on the safe side here since there are no users of it in the kernel. If anyone by any odd reason needs it the GPIO lookup tables and built-in device properties at your service. Signed-off-by: Andy Shevchenko Signed-off-by: Samuel Ortiz --- include/linux/platform_data/st-nci.h | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 include/linux/platform_data/st-nci.h (limited to 'include/linux') diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h deleted file mode 100644 index f6494b347c06..000000000000 --- a/include/linux/platform_data/st-nci.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Driver include for ST NCI NFC chip family. - * - * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef _ST_NCI_H_ -#define _ST_NCI_H_ - -#define ST_NCI_DRIVER_NAME "st_nci" - -struct st_nci_nfc_platform_data { - unsigned int gpio_reset; - unsigned int irq_polarity; - bool is_ese_present; - bool is_uicc_present; -}; - -#endif /* _ST_NCI_H_ */ -- cgit v1.2.3 From 313dd1b629219db50cad532dba6a3b3b22ffe622 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 5 May 2017 23:37:45 -0700 Subject: gcc-plugins: Add the randstruct plugin This randstruct plugin is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. The randstruct GCC plugin randomizes the layout of selected structures at compile time, as a probabilistic defense against attacks that need to know the layout of structures within the kernel. This is most useful for "in-house" kernel builds where neither the randomization seed nor other build artifacts are made available to an attacker. While less useful for distribution kernels (where the randomization seed must be exposed for third party kernel module builds), it still has some value there since now all kernel builds would need to be tracked by an attacker. In more performance sensitive scenarios, GCC_PLUGIN_RANDSTRUCT_PERFORMANCE can be selected to make a best effort to restrict randomization to cacheline-sized groups of elements, and will not randomize bitfields. This comes at the cost of reduced randomization. Two annotations are defined,__randomize_layout and __no_randomize_layout, which respectively tell the plugin to either randomize or not to randomize instances of the struct in question. Follow-on patches enable the auto-detection logic for selecting structures for randomization that contain only function pointers. It is disabled here to assist with bisection. Since any randomized structs must be initialized using designated initializers, __randomize_layout includes the __designated_init annotation even when the plugin is disabled so that all builds will require the needed initialization. (With the plugin enabled, annotations for automatically chosen structures are marked as well.) The main differences between this implemenation and grsecurity are: - disable automatic struct selection (to be enabled in follow-up patch) - add designated_init attribute at runtime and for manual marking - clarify debugging output to differentiate bad cast warnings - add whitelisting infrastructure - support gcc 7's DECL_ALIGN and DECL_MODE changes (Laura Abbott) - raise minimum required GCC version to 4.7 Earlier versions of this patch series were ported by Michael Leibowitz. Signed-off-by: Kees Cook --- include/linux/compiler-gcc.h | 5 +++++ include/linux/compiler.h | 8 ++++++++ include/linux/vermagic.h | 9 ++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 386caf6771ed..7deaae3dc87d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -223,6 +223,11 @@ /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) +#ifdef RANDSTRUCT_PLUGIN +#define __randomize_layout __attribute__((randomize_layout)) +#define __no_randomize_layout __attribute__((no_randomize_layout)) +#endif + #endif /* GCC_VERSION >= 40500 */ #if GCC_VERSION >= 40600 diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 80a1dea36cbe..55ee9ee814f8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -448,6 +448,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s # define __latent_entropy #endif +#ifndef __randomize_layout +# define __randomize_layout __designated_init +#endif + +#ifndef __no_randomize_layout +# define __no_randomize_layout +#endif + /* * Tell gcc if a function is cold. The compiler will assume any path * directly leading to the call is unlikely. diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 6f8fbcf10dfb..af6c03f7f986 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -24,10 +24,17 @@ #ifndef MODULE_ARCH_VERMAGIC #define MODULE_ARCH_VERMAGIC "" #endif +#ifdef RANDSTRUCT_PLUGIN +#include +#define MODULE_RANDSTRUCT_PLUGIN "RANDSTRUCT_PLUGIN_" RANDSTRUCT_HASHED_SEED +#else +#define MODULE_RANDSTRUCT_PLUGIN +#endif #define VERMAGIC_STRING \ UTS_RELEASE " " \ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ - MODULE_ARCH_VERMAGIC + MODULE_ARCH_VERMAGIC \ + MODULE_RANDSTRUCT_PLUGIN -- cgit v1.2.3 From 6932ec60cc0a71689150b16b71427cfdc6575602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Mon, 5 Jun 2017 21:04:21 +0200 Subject: soc: actions: owl-sps: Factor out owl_sps_set_pg() for power-gating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow the SMP code to reuse PM domain code for CPU2/CPU3 wakeup. Signed-off-by: Andreas Färber --- include/linux/soc/actions/owl-sps.h | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 include/linux/soc/actions/owl-sps.h (limited to 'include/linux') diff --git a/include/linux/soc/actions/owl-sps.h b/include/linux/soc/actions/owl-sps.h new file mode 100644 index 000000000000..33d0dbeceb55 --- /dev/null +++ b/include/linux/soc/actions/owl-sps.h @@ -0,0 +1,11 @@ +/* + * Copyright (c) 2017 Andreas Färber + * + * SPDX-License-Identifier: GPL-2.0+ + */ +#ifndef SOC_ACTIONS_OWL_SPS_H +#define SOC_ACTIONS_OWL_SPS_H + +int owl_sps_set_pg(void __iomem *base, u32 pwr_mask, u32 ack_mask, bool enable); + +#endif -- cgit v1.2.3 From 32d602771b624e3a2fc86d5e220e9fa7dced767a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Jun 2017 18:25:03 -0700 Subject: xdp: pass XDP flags into install handlers Pass XDP flags to the xdp ndo. This will allow drivers to look at the mode flags and make decisions about offload. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c7118b3bd69..b194817631de 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -820,6 +820,7 @@ struct netdev_xdp { union { /* XDP_SETUP_PROG */ struct { + u32 flags; struct bpf_prog *prog; struct netlink_ext_ack *extack; }; -- cgit v1.2.3 From ee5d032f7d032e2cea354522a46b211de84c4e8c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Jun 2017 18:25:04 -0700 Subject: xdp: add HW offload mode flag for installing programs Add an installation-time flag for requesting that the program be installed only if it can be offloaded to HW. Internally new command for ndo_xdp is added, this way we avoid putting checks into drivers since they all return -EINVAL on an unknown command. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b194817631de..a838591aad28 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -807,6 +807,7 @@ enum xdp_netdev_command { * when it is no longer used. */ XDP_SETUP_PROG, + XDP_SETUP_PROG_HW, /* Check if a bpf program is set on the device. The callee should * return true if a program is currently attached and running. */ -- cgit v1.2.3 From ce158e580a5bdc93286a3b630638bdd47d4ec663 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Jun 2017 18:25:09 -0700 Subject: xdp: add reporting of offload mode Extend the XDP_ATTACHED_* values to include offloaded mode. Let drivers report whether program is installed in the driver or the HW by changing the prog_attached field from bool to u8 (type of the netlink attribute). Exploit the fact that the value of XDP_ATTACHED_DRV is 1, therefore since all drivers currently assign the mode with double negation: mode = !!xdp_prog; no drivers have to be modified. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a838591aad28..68f5d899d1e6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -809,7 +809,8 @@ enum xdp_netdev_command { XDP_SETUP_PROG, XDP_SETUP_PROG_HW, /* Check if a bpf program is set on the device. The callee should - * return true if a program is currently attached and running. + * set @prog_attached to one of XDP_ATTACHED_* values, note that "true" + * is equivalent to XDP_ATTACHED_DRV. */ XDP_QUERY_PROG, }; @@ -827,7 +828,7 @@ struct netdev_xdp { }; /* XDP_QUERY_PROG */ struct { - bool prog_attached; + u8 prog_attached; u32 prog_id; }; }; @@ -3307,7 +3308,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id); +u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3 From 239946314e57711d7da546b67964d0b387a3ee42 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 22 Jun 2017 15:07:39 -0700 Subject: bpf: possibly avoid extra masking for narrower load in verifier Commit 31fd85816dbe ("bpf: permits narrower load from bpf program context fields") permits narrower load for certain ctx fields. The commit however will already generate a masking even if the prog-specific ctx conversion produces the result with narrower size. For example, for __sk_buff->protocol, the ctx conversion loads the data into register with 2-byte load. A narrower 2-byte load should not generate masking. For __sk_buff->vlan_present, the conversion function set the result as either 0 or 1, essentially a byte. The narrower 2-byte or 1-byte load should not generate masking. To avoid unnecessary masking, prog-specific *_is_valid_access now passes converted_op_size back to verifier, which indicates the valid data width after perceived future conversion. Based on this information, verifier is able to avoid unnecessary marking. Since we want more information back from prog-specific *_is_valid_access checking, all of them are packed into one data structure for more clarity. Acked-by: Daniel Borkmann Signed-off-by: Yonghong Song Signed-off-by: David S. Miller --- include/linux/bpf.h | 11 ++++++++++- include/linux/bpf_verifier.h | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1bcbf0a71f75..deca4e7f2845 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -149,6 +149,15 @@ enum bpf_reg_type { struct bpf_prog; +/* The information passed from prog-specific *_is_valid_access + * back to the verifier. + */ +struct bpf_insn_access_aux { + enum bpf_reg_type reg_type; + int ctx_field_size; + int converted_op_size; +}; + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -157,7 +166,7 @@ struct bpf_verifier_ops { * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, int *ctx_field_size); + struct bpf_insn_access_aux *info); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 189741c0da85..621076f56251 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -73,7 +73,8 @@ struct bpf_insn_aux_data { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; - int ctx_field_size; /* the ctx field size for load/store insns, maybe 0 */ + int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ + int converted_op_size; /* the valid value width after perceived conversion */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ -- cgit v1.2.3 From 735d8a18433e8d953e4e2b92883bfcc566e382c2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 23 Jun 2017 10:33:14 -0700 Subject: net: phy: Support "internal" PHY interface Now that the Device Tree binding has been updated, update the PHY library phy_interface_t and phy_modes to support the "internal" PHY interface type. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 23d2e46dd322..1d8d70193782 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -64,6 +64,7 @@ /* Interface Mode definitions */ typedef enum { PHY_INTERFACE_MODE_NA, + PHY_INTERFACE_MODE_INTERNAL, PHY_INTERFACE_MODE_MII, PHY_INTERFACE_MODE_GMII, PHY_INTERFACE_MODE_SGMII, @@ -114,6 +115,8 @@ static inline const char *phy_modes(phy_interface_t interface) switch (interface) { case PHY_INTERFACE_MODE_NA: return ""; + case PHY_INTERFACE_MODE_INTERNAL: + return "internal"; case PHY_INTERFACE_MODE_MII: return "mii"; case PHY_INTERFACE_MODE_GMII: -- cgit v1.2.3 From bce70fef7279243d62adbf5f53998b8d3d016144 Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Tue, 16 May 2017 17:46:48 +0200 Subject: platform/chrome: cros_ec_lpc: Add R/W helpers to LPC protocol variants Call common functions for read / write to prepare support for future LPC protocol variants which use different I/O ops than inb / outb. Signed-off-by: Shawn Nematbakhsh Signed-off-by: Thierry Escande Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_lpc_reg.h | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/linux/mfd/cros_ec_lpc_reg.h (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h new file mode 100644 index 000000000000..4089bd5c8313 --- /dev/null +++ b/include/linux/mfd/cros_ec_lpc_reg.h @@ -0,0 +1,47 @@ +/* + * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller + * + * Copyright (C) 2016 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This driver uses the Chrome OS EC byte-level message-based protocol for + * communicating the keyboard state (which keys are pressed) from a keyboard EC + * to the AP over some bus (such as i2c, lpc, spi). The EC does debouncing, + * but everything else (including deghosting) is done here. The main + * motivation for this is to keep the EC firmware as simple as possible, since + * it cannot be easily upgraded and EC flash/IRAM space is relatively + * expensive. + */ + +#ifndef __LINUX_MFD_CROS_EC_REG_H +#define __LINUX_MFD_CROS_EC_REG_H + +/** + * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address. + * Returns 8-bit checksum of all bytes read. + * + * @offset: Base read address + * @length: Number of bytes to read + * @dest: Destination buffer + */ +u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest); + +/** + * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address. + * Returns 8-bit checksum of all bytes written. + * + * @offset: Base write address + * @length: Number of bytes to write + * @msg: Write data buffer + */ +u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg); + +#endif /* __LINUX_MFD_CROS_EC_REG_H */ -- cgit v1.2.3 From 8d4a3dc423a2695be51ac864eefb8ba7688b1240 Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Tue, 16 May 2017 17:46:48 +0200 Subject: platform/chrome: cros_ec_lpc: Add support for mec1322 EC This adds support for the ChromeOS LPC Microchip Embedded Controller (mec1322) variant. mec1322 accesses I/O region [800h, 9ffh] through embedded memory interface (EMI) rather than LPC. Signed-off-by: Shawn Nematbakhsh Signed-off-by: Thierry Escande Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_lpc_mec.h | 90 +++++++++++++++++++++++++++++++++++++ include/linux/mfd/cros_ec_lpc_reg.h | 14 ++++++ 2 files changed, 104 insertions(+) create mode 100644 include/linux/mfd/cros_ec_lpc_mec.h (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_lpc_mec.h b/include/linux/mfd/cros_ec_lpc_mec.h new file mode 100644 index 000000000000..176496ddc66c --- /dev/null +++ b/include/linux/mfd/cros_ec_lpc_mec.h @@ -0,0 +1,90 @@ +/* + * cros_ec_lpc_mec - LPC variant I/O for Microchip EC + * + * Copyright (C) 2016 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This driver uses the Chrome OS EC byte-level message-based protocol for + * communicating the keyboard state (which keys are pressed) from a keyboard EC + * to the AP over some bus (such as i2c, lpc, spi). The EC does debouncing, + * but everything else (including deghosting) is done here. The main + * motivation for this is to keep the EC firmware as simple as possible, since + * it cannot be easily upgraded and EC flash/IRAM space is relatively + * expensive. + */ + +#ifndef __LINUX_MFD_CROS_EC_MEC_H +#define __LINUX_MFD_CROS_EC_MEC_H + +#include + +enum cros_ec_lpc_mec_emi_access_mode { + /* 8-bit access */ + ACCESS_TYPE_BYTE = 0x0, + /* 16-bit access */ + ACCESS_TYPE_WORD = 0x1, + /* 32-bit access */ + ACCESS_TYPE_LONG = 0x2, + /* + * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the + * EC data register to be incremented. + */ + ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3, +}; + +enum cros_ec_lpc_mec_io_type { + MEC_IO_READ, + MEC_IO_WRITE, +}; + +/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */ +#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0 +#define MEC_EMI_RANGE_END (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE) + +/* EMI registers are relative to base */ +#define MEC_EMI_BASE 0x800 +#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0) +#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1) +#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2) +#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3) +#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4) +#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5) +#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6) +#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7) + +/* + * cros_ec_lpc_mec_init + * + * Initialize MEC I/O. + */ +void cros_ec_lpc_mec_init(void); + +/* + * cros_ec_lpc_mec_destroy + * + * Cleanup MEC I/O. + */ +void cros_ec_lpc_mec_destroy(void); + +/** + * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port + * + * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request + * @offset: Base read / write address + * @length: Number of bytes to read / write + * @buf: Destination / source buffer + * + * @return 8-bit checksum of all bytes read / written + */ +u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type, + unsigned int offset, unsigned int length, u8 *buf); + +#endif /* __LINUX_MFD_CROS_EC_MEC_H */ diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h index 4089bd5c8313..5560bef63c2b 100644 --- a/include/linux/mfd/cros_ec_lpc_reg.h +++ b/include/linux/mfd/cros_ec_lpc_reg.h @@ -44,4 +44,18 @@ u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest); */ u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg); +/** + * cros_ec_lpc_reg_init + * + * Initialize register I/O. + */ +void cros_ec_lpc_reg_init(void); + +/** + * cros_ec_lpc_reg_destroy + * + * Cleanup reg I/O. + */ +void cros_ec_lpc_reg_destroy(void); + #endif /* __LINUX_MFD_CROS_EC_REG_H */ -- cgit v1.2.3 From be3ebebf4377fe924f0419f78fc82cf01a31e692 Mon Sep 17 00:00:00 2001 From: Eric Caruso Date: Tue, 16 May 2017 17:46:48 +0200 Subject: platform/chrome: cros_ec_lightbar - Add lightbar program feature to sysfs Add a program feature so we can upload and run programs for lightbar sequences. We should be able to use this to shift sequences out of the EC and save space there. $ cat > /sys/devices/.../cros_ec/program $ echo program > /sys/devices/.../cros_ec/sequence Signed-off-by: Eric Caruso Signed-off-by: Guenter Roeck Signed-off-by: Enric Balletbo i Serra Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_commands.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 1b19e424e1cf..dbea5802e83b 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1162,6 +1162,13 @@ struct lightbar_params_v1 { struct rgb_s color[8]; /* 0-3 are Google colors */ } __packed; +/* Lightbar program */ +#define EC_LB_PROG_LEN 192 +struct lightbar_program { + uint8_t size; + uint8_t data[EC_LB_PROG_LEN]; +}; + struct ec_params_lightbar { uint8_t cmd; /* Command (see enum lightbar_command) */ union { @@ -1188,6 +1195,7 @@ struct ec_params_lightbar { struct lightbar_params_v0 set_params_v0; struct lightbar_params_v1 set_params_v1; + struct lightbar_program set_program; }; } __packed; @@ -1220,7 +1228,8 @@ struct ec_response_lightbar { struct { /* no return params */ } off, on, init, set_brightness, seq, reg, set_rgb, - demo, set_params_v0, set_params_v1; + demo, set_params_v0, set_params_v1, + set_program; }; } __packed; @@ -1244,6 +1253,7 @@ enum lightbar_command { LIGHTBAR_CMD_GET_DEMO = 15, LIGHTBAR_CMD_GET_PARAMS_V1 = 16, LIGHTBAR_CMD_SET_PARAMS_V1 = 17, + LIGHTBAR_CMD_SET_PROGRAM = 18, LIGHTBAR_NUM_CMDS }; -- cgit v1.2.3 From 405c84308c4335ee7cb58b9304b77b85e61f7129 Mon Sep 17 00:00:00 2001 From: Eric Caruso Date: Tue, 16 May 2017 17:46:48 +0200 Subject: platform/chrome: cros_ec_lightbar - Control of suspend/resume lightbar sequence Don't let EC control suspend/resume sequence. If the EC controls the lightbar and sets the sequence when it notices the chipset transitioning between states, we can't make exceptions for cases where we don't want to activate the lightbar. Instead, let's move the suspend/resume notifications into the kernel so we can selectively play the sequences. Signed-off-by: Eric Caruso Signed-off-by: Guenter Roeck Signed-off-by: Enric Balletbo i Serra Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_commands.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index dbea5802e83b..190c8f4afa02 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1175,7 +1175,7 @@ struct ec_params_lightbar { struct { /* no args */ } dump, off, on, init, get_seq, get_params_v0, get_params_v1, - version, get_brightness, get_demo; + version, get_brightness, get_demo, suspend, resume; struct { uint8_t num; @@ -1193,6 +1193,10 @@ struct ec_params_lightbar { uint8_t led; } get_rgb; + struct { + uint8_t enable; + } manual_suspend_ctrl; + struct lightbar_params_v0 set_params_v0; struct lightbar_params_v1 set_params_v1; struct lightbar_program set_program; @@ -1229,7 +1233,7 @@ struct ec_response_lightbar { /* no return params */ } off, on, init, set_brightness, seq, reg, set_rgb, demo, set_params_v0, set_params_v1, - set_program; + set_program, manual_suspend_ctrl, suspend, resume; }; } __packed; @@ -1254,6 +1258,9 @@ enum lightbar_command { LIGHTBAR_CMD_GET_PARAMS_V1 = 16, LIGHTBAR_CMD_SET_PARAMS_V1 = 17, LIGHTBAR_CMD_SET_PROGRAM = 18, + LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL = 19, + LIGHTBAR_CMD_SUSPEND = 20, + LIGHTBAR_CMD_RESUME = 21, LIGHTBAR_NUM_CMDS }; -- cgit v1.2.3 From 829a4e8c0e9aab17bcfe2ddb070388b8ada26292 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 21 Jun 2017 10:29:13 +0530 Subject: PM / OPP: Add dev_pm_opp_{set|put}_clkname() In order to support OPP switching, OPP layer needs to get pointer to the clock for the device. Simple cases work fine without using the routines added by this patch (i.e. by passing connection-id as NULL), but for a device with multiple clocks available, the OPP core needs to know the exact name of the clk to use. Add a new set of APIs to get that done. Tested-by: Rajendra Nayak Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index a6685b3dde26..51ec727b4824 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -121,6 +121,8 @@ struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) void dev_pm_opp_put_prop_name(struct opp_table *opp_table); struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count); void dev_pm_opp_put_regulators(struct opp_table *opp_table); +struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); +void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); @@ -257,6 +259,13 @@ static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, co static inline void dev_pm_opp_put_regulators(struct opp_table *opp_table) {} +static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {} + static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { return -ENOTSUPP; -- cgit v1.2.3 From 27e47a6342e21b005a15a1f0afea0b6f179e0a71 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 22 May 2017 18:14:06 -0700 Subject: fscrypt: inline fscrypt_free_filename() fscrypt_free_filename() only needs to do a kfree() of crypto_buf.name, which works well as an inline function. We can skip setting the various pointers to NULL, since no user cares about it (the name is always freed just before it goes out of scope). Signed-off-by: Eric Biggers Reviewed-by: David Gstir Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_supp.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index cd4e82c17304..32e2fcf13b01 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -47,7 +47,12 @@ extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *); /* fname.c */ extern int fscrypt_setup_filename(struct inode *, const struct qstr *, int lookup, struct fscrypt_name *); -extern void fscrypt_free_filename(struct fscrypt_name *); + +static inline void fscrypt_free_filename(struct fscrypt_name *fname) +{ + kfree(fname->crypto_buf.name); +} + extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32); extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); -- cgit v1.2.3 From b7e7cf7a66a27e62c5f873a0068cee34094bf5d7 Mon Sep 17 00:00:00 2001 From: Daniel Walter Date: Mon, 19 Jun 2017 09:27:58 +0200 Subject: fscrypt: add support for AES-128-CBC fscrypt provides facilities to use different encryption algorithms which are selectable by userspace when setting the encryption policy. Currently, only AES-256-XTS for file contents and AES-256-CBC-CTS for file names are implemented. This is a clear case of kernel offers the mechanism and userspace selects a policy. Similar to what dm-crypt and ecryptfs have. This patch adds support for using AES-128-CBC for file contents and AES-128-CBC-CTS for file name encryption. To mitigate watermarking attacks, IVs are generated using the ESSIV algorithm. While AES-CBC is actually slightly less secure than AES-XTS from a security point of view, there is more widespread hardware support. Using AES-CBC gives us the acceptable performance while still providing a moderate level of security for persistent storage. Especially low-powered embedded devices with crypto accelerators such as CAAM or CESA often only support AES-CBC. Since using AES-CBC over AES-XTS is basically thought of a last resort, we use AES-128-CBC over AES-256-CBC since it has less encryption rounds and yields noticeable better performance starting from a file size of just a few kB. Signed-off-by: Daniel Walter [david@sigma-star.at: addressed review comments] Signed-off-by: David Gstir Reviewed-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_common.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h index 0a30c106c1e5..4022c61f7e9b 100644 --- a/include/linux/fscrypt_common.h +++ b/include/linux/fscrypt_common.h @@ -91,14 +91,18 @@ static inline bool fscrypt_dummy_context_enabled(struct inode *inode) return false; } -static inline bool fscrypt_valid_contents_enc_mode(u32 mode) +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) { - return (mode == FS_ENCRYPTION_MODE_AES_256_XTS); -} + if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && + filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) + return true; -static inline bool fscrypt_valid_filenames_enc_mode(u32 mode) -{ - return (mode == FS_ENCRYPTION_MODE_AES_256_CTS); + if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && + filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) + return true; + + return false; } static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -- cgit v1.2.3 From c250b7dd8e73b5f7d88d231fbaac92e3360a7234 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Jun 2017 12:14:40 -0700 Subject: fscrypt: make ->dummy_context() return bool This makes it consistent with ->is_encrypted(), ->empty_dir(), and fscrypt_dummy_context_enabled(). Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h index 4022c61f7e9b..e3e1208e0f54 100644 --- a/include/linux/fscrypt_common.h +++ b/include/linux/fscrypt_common.h @@ -77,7 +77,7 @@ struct fscrypt_operations { const char *key_prefix; int (*get_context)(struct inode *, void *, size_t); int (*set_context)(struct inode *, const void *, size_t, void *); - int (*dummy_context)(struct inode *); + bool (*dummy_context)(struct inode *); bool (*is_encrypted)(struct inode *); bool (*empty_dir)(struct inode *); unsigned (*max_namelen)(struct inode *); -- cgit v1.2.3 From b2d3d61adb7b73cfe5f82404f7a130a76fc64232 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 23 Jun 2017 16:11:07 +0200 Subject: genirq/timings: Add infrastructure to track the interrupt timings The interrupt framework gives a lot of information about each interrupt. It does not keep track of when those interrupts occur though, which is a prerequisite for estimating the next interrupt arrival for power management purposes. Add a mechanism to record the timestamp for each interrupt occurrences in a per-CPU circular buffer to help with the prediction of the next occurrence using a statistical model. Each CPU can store up to IRQ_TIMINGS_SIZE events , the current value of IRQ_TIMINGS_SIZE is 32. Each event is encoded into a single u64, where the high 48 bits are used for the timestamp and the low 16 bits are for the irq number. A static key is introduced so when the irq prediction is switched off at runtime, the overhead is near to zero. It results in most of the code in internals.h for inline reasons and a very few in the new file timings.c. The latter will contain more in the next patch which will provide the statistical model for the next event prediction. Signed-off-by: Daniel Lezcano Signed-off-by: Thomas Gleixner Acked-by: Nicolas Pitre Cc: Jens Axboe Cc: Hannes Reinecke Cc: Vincent Guittot Cc: "Rafael J . Wysocki" Cc: Peter Zijlstra Cc: Bjorn Helgaas Link: http://lkml.kernel.org/r/1498227072-5980-1-git-send-email-daniel.lezcano@linaro.org --- include/linux/interrupt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a6fba4804672..9f617238a2f7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -703,6 +703,11 @@ static inline void init_irq_proc(void) } #endif +#ifdef CONFIG_IRQ_TIMINGS +void irq_timings_enable(void); +void irq_timings_disable(void); +#endif + struct seq_file; int show_interrupts(struct seq_file *p, void *v); int arch_show_interrupts(struct seq_file *p, int prec); -- cgit v1.2.3 From e1c921495534002d727b15a76a2f8c20b6b108b5 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 23 Jun 2017 16:11:08 +0200 Subject: genirq/timings: Add infrastructure for estimating the next interrupt arrival time An interrupt behaves with a burst of activity with periodic interval of time followed by one or two peaks of longer interval. As the time intervals are periodic, statistically speaking they follow a normal distribution and each interrupts can be tracked individually. Add a mechanism to compute the statistics on all interrupts, except the timers which are deterministic from a prediction point of view, as their expiry time is known. The goal is to extract the periodicity for each interrupt, with the last timestamp and sum them, so the next event can be predicted to a certain extent. Taking the earliest prediction gives the expected wakeup on the system (assuming a timer won't expire before). Signed-off-by: Daniel Lezcano Signed-off-by: Thomas Gleixner Cc: Nicolas Pitre Cc: Jens Axboe Cc: Hannes Reinecke Cc: Vincent Guittot Cc: "Rafael J . Wysocki" Cc: Peter Zijlstra Cc: Bjorn Helgaas Link: http://lkml.kernel.org/r/1498227072-5980-2-git-send-email-daniel.lezcano@linaro.org --- include/linux/interrupt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9f617238a2f7..37f8e354f564 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -706,6 +706,7 @@ static inline void init_irq_proc(void) #ifdef CONFIG_IRQ_TIMINGS void irq_timings_enable(void); void irq_timings_disable(void); +u64 irq_timings_next_event(u64 now); #endif struct seq_file; -- cgit v1.2.3 From cbf4b3867875206aa548a8c6d7c886f3299d619e Mon Sep 17 00:00:00 2001 From: Okash Khawaja Date: Sat, 17 Jun 2017 22:32:55 +0100 Subject: tty: define tty_open_by_driver when CONFIG_TTY is not defined This patch adds definition of tty_open_by_driver when CONFIG_TTY is not defined. This was supposed to have been included in commit 12e84c71b7d4ee38d51377fd494ac748ee4e6912 ("tty: export tty_open_by_driver"). The patch follows convention for other such functions and returns NULL. Signed-off-by: Okash Khawaja Reviewed-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 5c3f01f49b10..b75b2d51ba2b 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -422,6 +422,9 @@ static inline int __init tty_init(void) { return 0; } static inline const char *tty_name(const struct tty_struct *tty) { return "(none)"; } +static inline struct tty_struct *tty_open_by_driver(dev_t device, + struct inode *inode, struct file *filp) +{ return NULL; } #endif extern struct ktermios tty_std_termios; -- cgit v1.2.3 From f3ecab38240b624b37e003b5089a93682b109699 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 25 Jun 2017 11:09:12 +0300 Subject: net: Remove ndo_dfwd_start_xmit Looks like commit f663dd9aaf9e ("net: core: explicitly select a txq before doing l2 forwarding") has removed the need for this dedicated xmit function [it even explicitly states so in its commit log message] but it hasn't removed the definition of the ndo. Signed-off-by: Yuval Mintz CC: Jason Wang CC: John Fastabend Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 68f5d899d1e6..85f01d673340 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1114,12 +1114,6 @@ struct xfrmdev_ops { * by 'ndo_dfwd_add_station'. 'pdev' is the net device backing * the station and priv is the structure returned by the add * operation. - * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb, - * struct net_device *dev, - * void *priv); - * Callback to use for xmit over the accelerated station. This - * is used in place of ndo_start_xmit on accelerated net - * devices. * int (*ndo_set_tx_maxrate)(struct net_device *dev, * int queue_index, u32 maxrate); * Called when a user wants to set a max-rate limitation of specific @@ -1316,9 +1310,6 @@ struct net_device_ops { void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv); - netdev_tx_t (*ndo_dfwd_start_xmit) (struct sk_buff *skb, - struct net_device *dev, - void *priv); int (*ndo_get_lock_subclass)(struct net_device *dev); int (*ndo_set_tx_maxrate)(struct net_device *dev, int queue_index, -- cgit v1.2.3 From f59dd9c886acb3abb188e8e94a99436560976835 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 24 Jun 2017 11:45:02 -0700 Subject: time: add get_timespec64 and put_timespec64 Add helper functions to convert between struct timespec64 and struct timespec at userspace boundaries. This is a preparatory patch to use timespec64 as the basic type internally in the kernel as timespec is not y2038 safe on 32 bit systems. The patch helps the cause by containing all data conversions at the userspace boundaries within these functions. Suggested-by: Arnd Bergmann Signed-off-by: Deepa Dinamani Signed-off-by: Al Viro --- include/linux/compat.h | 2 ++ include/linux/time.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 425563c7647b..3eb04016ffa9 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -164,6 +164,8 @@ extern int compat_get_timespec(struct timespec *, const void __user *); extern int compat_put_timespec(const struct timespec *, void __user *); extern int compat_get_timeval(struct timeval *, const void __user *); extern int compat_put_timeval(const struct timeval *, void __user *); +extern int compat_get_timespec64(struct timespec64 *, const void __user *); +extern int compat_put_timespec64(const struct timespec64 *, void __user *); /* * This function convert a timespec if necessary and returns a *user diff --git a/include/linux/time.h b/include/linux/time.h index c0543f5f25de..36afb579495f 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -8,6 +8,11 @@ extern struct timezone sys_tz; +int get_timespec64(struct timespec64 *ts, + const struct timespec __user *uts); +int put_timespec64(const struct timespec64 *ts, + struct timespec __user *uts); + #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) static inline int timespec_equal(const struct timespec *a, -- cgit v1.2.3 From d5b7ffbfbdacc29e4db035f90665951668fa9c58 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 24 Jun 2017 11:45:03 -0700 Subject: time: introduce {get,put}_itimerspec64 As we change the user space type for the timerfd and posix timer functions to newer data types, we need some form of conversion helpers to avoid duplicating that logic. Suggested-by: Arnd Bergmann Signed-off-by: Deepa Dinamani Signed-off-by: Al Viro --- include/linux/compat.h | 4 ++++ include/linux/posix-timers.h | 1 - include/linux/time.h | 13 +++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 3eb04016ffa9..2ed54020ace0 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -166,6 +166,10 @@ extern int compat_get_timeval(struct timeval *, const void __user *); extern int compat_put_timeval(const struct timeval *, void __user *); extern int compat_get_timespec64(struct timespec64 *, const void __user *); extern int compat_put_timespec64(const struct timespec64 *, void __user *); +extern int get_compat_itimerspec64(struct itimerspec64 *its, + const struct compat_itimerspec __user *uits); +extern int put_compat_itimerspec64(const struct itimerspec64 *its, + struct compat_itimerspec __user *uits); /* * This function convert a timespec if necessary and returns a *user diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 29f1b7f09ced..62839fd04dce 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -113,5 +113,4 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); void posixtimer_rearm(struct siginfo *info); - #endif diff --git a/include/linux/time.h b/include/linux/time.h index 36afb579495f..f9858d7e6361 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -12,6 +12,10 @@ int get_timespec64(struct timespec64 *ts, const struct timespec __user *uts); int put_timespec64(const struct timespec64 *ts, struct timespec __user *uts); +int get_itimerspec64(struct itimerspec64 *it, + const struct itimerspec __user *uit); +int put_itimerspec64(const struct itimerspec64 *it, + struct itimerspec __user *uit); #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) @@ -275,4 +279,13 @@ static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) a->tv_nsec = ns; } +static inline bool itimerspec64_valid(const struct itimerspec64 *its) +{ + if (!timespec64_valid(&(its->it_interval)) || + !timespec64_valid(&(its->it_value))) + return false; + + return true; +} + #endif -- cgit v1.2.3 From 9902747ec57d11b27c98e53d66112ecceed43c82 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 26 Jun 2017 10:24:27 +0200 Subject: Revert "ktime: Simplify ktime_compare implementation" Thierry bisected boot failures to this simplification commit. Reverts: 3f1d472055bb ("ktime: Simplify ktime_compare implementation") Reported-by: Thierry Reding Signed-off-by: Thomas Gleixner Cc: Mariusz Skamra --- include/linux/ktime.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 04817b1ca019..0c8bd45c8206 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -108,7 +108,11 @@ static inline ktime_t timeval_to_ktime(struct timeval tv) */ static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2) { - return ktime_sub(cmp1, cmp2); + if (cmp1 < cmp2) + return -1; + if (cmp1 > cmp2) + return 1; + return 0; } /** -- cgit v1.2.3 From 5985ea8bd5d1b820b909af49fbc2767a990080a6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 23 Jun 2017 16:05:11 -0400 Subject: ftrace: Have the cached module list show in set_ftrace_filter When writing in a module filter into set_ftrace_filter for a module that is not yet loaded, it it cached, and will be executed when the module is loaded (although that is not implemented yet at this commit). Display the list of cached modules to be traced. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1b6992e994e6..9fb9a67dc9d4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -446,7 +446,8 @@ enum { FTRACE_ITER_PRINTALL = (1 << 2), FTRACE_ITER_DO_PROBES = (1 << 3), FTRACE_ITER_PROBE = (1 << 4), - FTRACE_ITER_ENABLED = (1 << 5), + FTRACE_ITER_MOD = (1 << 5), + FTRACE_ITER_ENABLED = (1 << 6), }; void arch_ftrace_update_code(int command); -- cgit v1.2.3 From 8c08f0d5c6fb10ff93ffb1cbf416f4f1c3a52a80 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 26 Jun 2017 11:47:31 -0400 Subject: ftrace: Have cached module filters be an active filter When a module filter is added to set_ftrace_filter, if the module is not loaded, it is cached. This should be considered an active filter, and function tracing should be filtered by this. That is, if a cached module filter is the only filter set, then no function tracing should be happening, as all the functions available will be filtered out. This makes sense, as the reason to add a cached module filter, is to trace the module when you load it. There shouldn't be any other tracing happening until then. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9fb9a67dc9d4..5857390ac35a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -120,6 +120,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * this ops will fail to register or set_filter_ip. * PID - Is affected by set_ftrace_pid (allows filtering on those pids) * RCU - Set when the ops can only be called when RCU is watching. + * TRACE_ARRAY - The ops->private points to a trace_array descriptor. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -138,6 +139,7 @@ enum { FTRACE_OPS_FL_IPMODIFY = 1 << 13, FTRACE_OPS_FL_PID = 1 << 14, FTRACE_OPS_FL_RCU = 1 << 15, + FTRACE_OPS_FL_TRACE_ARRAY = 1 << 16, }; #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3 From f8475cef90082bf0902ddab106112de130d90395 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 23 Jun 2017 22:11:52 -0700 Subject: x86: use common aperfmperf_khz_on_cpu() to calculate KHz using APERF/MPERF The goal of this change is to give users a uniform and meaningful result when they read /sys/...cpufreq/scaling_cur_freq on modern x86 hardware, as compared to what they get today. Modern x86 processors include the hardware needed to accurately calculate frequency over an interval -- APERF, MPERF, and the TSC. Here we provide an x86 routine to make this calculation on supported hardware, and use it in preference to any driver driver-specific cpufreq_driver.get() routine. MHz is computed like so: MHz = base_MHz * delta_APERF / delta_MPERF MHz is the average frequency of the busy processor over a measurement interval. The interval is defined to be the time between successive invocations of aperfmperf_khz_on_cpu(), which are expected to to happen on-demand when users read sysfs attribute cpufreq/scaling_cur_freq. As with previous methods of calculating MHz, idle time is excluded. base_MHz above is from TSC calibration global "cpu_khz". This x86 native method to calculate MHz returns a meaningful result no matter if P-states are controlled by hardware or firmware and/or if the Linux cpufreq sub-system is or is-not installed. When this routine is invoked more frequently, the measurement interval becomes shorter. However, the code limits re-computation to 10ms intervals so that average frequency remains meaningful. Discerning users are encouraged to take advantage of the turbostat(8) utility, which can gracefully handle concurrent measurement intervals of arbitrary length. Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a5ce0bbeadb5..905117bd5012 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -883,6 +883,8 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) } #endif +extern unsigned int arch_freq_get_on_cpu(int cpu); + /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; -- cgit v1.2.3 From fc61ed51270e86440cf7cf84cbe1d86753592932 Mon Sep 17 00:00:00 2001 From: Okash Khawaja Date: Sun, 25 Jun 2017 19:40:00 +0100 Subject: tty: add function to convert device name to number The function converts strings like ttyS0 and ttyUSB0 to dev_t like (4, 64) and (188, 0). It does this by scanning tty_drivers list for corresponding device name and index. If the driver is not registered, this function returns -ENODEV. It also acquires tty_mutex. Signed-off-by: Okash Khawaja Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index b75b2d51ba2b..8156a9ee6fe6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -402,6 +402,7 @@ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); extern struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode, struct file *filp); +extern int tty_dev_name_to_number(const char *name, dev_t *number); #else static inline void tty_kref_put(struct tty_struct *tty) { } @@ -425,6 +426,8 @@ static inline const char *tty_name(const struct tty_struct *tty) static inline struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode, struct file *filp) { return NULL; } +static inline int tty_dev_name_to_number(const char *name, dev_t *number) +{ return -ENOTSUPP; } #endif extern struct ktermios tty_std_termios; -- cgit v1.2.3 From 2a0165a034ac024b60cca49c61e46f4afa2e4d98 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 30 Mar 2017 17:09:00 +0300 Subject: net/mlx5: Cancel delayed recovery work when unloading the driver Draining the health workqueue will ignore future health works including the one that report hardware failure and thus we can't enter error state Instead cancel the recovery flow and make sure only recovery flow won't be scheduled. Fixes: 5e44fca50470 ('net/mlx5: Only cancel recovery work when cleaning up device') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 93273d9ea4d1..ba260330ce5e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -925,6 +925,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); +void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); -- cgit v1.2.3 From 52ec462eca9b87b8036209483efe1c6cf9c49d9a Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Sun, 26 Mar 2017 17:01:57 +0300 Subject: net/mlx5: Add reserved-gids support Reserved GIDs are entries in the GID table in use by the mlx5_core and its submodules (e.g. FPGA, SRIOV, E-Swtich, netdev). The entries are reserved at the high indexes of the GID table. A mlx5 submodule may reserve a certain amount of GIDs for its own use during the load sequence by calling mlx5_core_reserve_gids, and must also take care to un-reserve these GIDs when it closes. Reservation is only allowed during the load sequence and before any interfaces (e.g. mlx5_ib or mlx5_en) are up. After reservation, a submodule may call mlx5_core_reserved_gid_alloc/ free to allocate entries from the reserved GIDs pool. Reserve a GID table entry for every supported FPGA QP. A later patch in the patchset will remove them from being reported to IB core. Another such patch will make use of these for FPGA QPs in Innova NIC. Added lib/mlx5.h to serve as a library for mlx5 submodlues, and to expose only public mlx5 API, more mlx5 library files will be added in future submissions. Signed-off-by: Ilan Tayari Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 750701b3b863..08e99bd2cd77 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -737,6 +738,14 @@ struct mlx5e_resources { struct mlx5_sq_bfreg bfreg; }; +#define MLX5_MAX_RESERVED_GIDS 8 + +struct mlx5_rsvd_gids { + unsigned int start; + unsigned int count; + struct ida ida; +}; + struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ @@ -766,6 +775,9 @@ struct mlx5_core_dev { atomic_t num_qps; u32 issi; struct mlx5e_resources mlx5e_res; + struct { + struct mlx5_rsvd_gids reserved_gids; + } roce; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif @@ -1045,6 +1057,11 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, bool map_wc, bool fast_path); void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); +unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); +int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, + u8 roce_version, u8 roce_l3_type, const u8 *gid, + const u8 *mac, bool vlan, u16 vlan_id); + static inline int fw_initializing(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->initializing) >> 31; -- cgit v1.2.3 From a6f7d2aff623bb7572d4bca1caf5820e0cd5a586 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Sun, 26 Mar 2017 17:23:42 +0300 Subject: net/mlx5: Add support for multiple RoCE enable Previously, only mlx5_ib enabled RoCE on the port, but FPGA needs it as well. Add support for counting number of enables, so that FPGA and IB can work in parallel and independently. Program the HW to enable RoCE on the first enable call, and program to disable RoCE on the last disable call. Signed-off-by: Ilan Tayari Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 08e99bd2cd77..32b0835d4491 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -777,6 +777,7 @@ struct mlx5_core_dev { struct mlx5e_resources mlx5e_res; struct { struct mlx5_rsvd_gids reserved_gids; + atomic_t roce_en; } roce; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; -- cgit v1.2.3 From 6062118d5cd2b90369278cdf831aeffb84ae3943 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Mon, 27 Mar 2017 14:52:09 +0300 Subject: net/mlx5: FPGA, Add FW commands for FPGA QPs The FPGA QP is a high-bandwidth communication channel between the host CPU and the FPGA device. It allows performing DMA operations between host memory and the FPGA logic via the ConnectX chip. Add ConnectX FW commands which create and manipulate FPGA QPs. Signed-off-by: Ilan Tayari Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 5 + include/linux/mlx5/mlx5_ifc_fpga.h | 199 +++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d6b99d5d0f24..a8b3fcaa33ff 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -232,6 +232,11 @@ enum { MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, + MLX5_CMD_OP_FPGA_CREATE_QP = 0x960, + MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961, + MLX5_CMD_OP_FPGA_QUERY_QP = 0x962, + MLX5_CMD_OP_FPGA_DESTROY_QP = 0x963, + MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS = 0x964, MLX5_CMD_OP_MAX }; diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 0032d10ac6cf..30d4b697fab6 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -141,4 +141,203 @@ struct mlx5_ifc_fpga_error_event_bits { u8 reserved_at_60[0x80]; }; +enum mlx5_ifc_fpga_qp_state { + MLX5_FPGA_QPC_STATE_INIT = 0x0, + MLX5_FPGA_QPC_STATE_ACTIVE = 0x1, + MLX5_FPGA_QPC_STATE_ERROR = 0x2, +}; + +enum mlx5_ifc_fpga_qp_type { + MLX5_FPGA_QPC_QP_TYPE_SHELL_QP = 0x0, + MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP = 0x1, +}; + +enum mlx5_ifc_fpga_qp_service_type { + MLX5_FPGA_QPC_ST_RC = 0x0, +}; + +struct mlx5_ifc_fpga_qpc_bits { + u8 state[0x4]; + u8 reserved_at_4[0x1b]; + u8 qp_type[0x1]; + + u8 reserved_at_20[0x4]; + u8 st[0x4]; + u8 reserved_at_28[0x10]; + u8 traffic_class[0x8]; + + u8 ether_type[0x10]; + u8 prio[0x3]; + u8 dei[0x1]; + u8 vid[0xc]; + + u8 reserved_at_60[0x20]; + + u8 reserved_at_80[0x8]; + u8 next_rcv_psn[0x18]; + + u8 reserved_at_a0[0x8]; + u8 next_send_psn[0x18]; + + u8 reserved_at_c0[0x10]; + u8 pkey[0x10]; + + u8 reserved_at_e0[0x8]; + u8 remote_qpn[0x18]; + + u8 reserved_at_100[0x15]; + u8 rnr_retry[0x3]; + u8 reserved_at_118[0x5]; + u8 retry_count[0x3]; + + u8 reserved_at_120[0x20]; + + u8 reserved_at_140[0x10]; + u8 remote_mac_47_32[0x10]; + + u8 remote_mac_31_0[0x20]; + + u8 remote_ip[16][0x8]; + + u8 reserved_at_200[0x40]; + + u8 reserved_at_240[0x10]; + u8 fpga_mac_47_32[0x10]; + + u8 fpga_mac_31_0[0x20]; + + u8 fpga_ip[16][0x8]; +}; + +struct mlx5_ifc_fpga_create_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_fpga_qpc_bits fpga_qpc; +}; + +struct mlx5_ifc_fpga_create_qp_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x8]; + u8 fpga_qpn[0x18]; + + u8 reserved_at_60[0x20]; + + struct mlx5_ifc_fpga_qpc_bits fpga_qpc; +}; + +struct mlx5_ifc_fpga_modify_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 fpga_qpn[0x18]; + + u8 field_select[0x20]; + + struct mlx5_ifc_fpga_qpc_bits fpga_qpc; +}; + +struct mlx5_ifc_fpga_modify_qp_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_fpga_query_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 fpga_qpn[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_fpga_query_qp_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_fpga_qpc_bits fpga_qpc; +}; + +struct mlx5_ifc_fpga_query_qp_counters_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 clear[0x1]; + u8 reserved_at_41[0x7]; + u8 fpga_qpn[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_fpga_query_qp_counters_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 rx_ack_packets[0x40]; + + u8 rx_send_packets[0x40]; + + u8 tx_ack_packets[0x40]; + + u8 tx_send_packets[0x40]; + + u8 rx_total_drop[0x40]; + + u8 reserved_at_1c0[0x1c0]; +}; + +struct mlx5_ifc_fpga_destroy_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x8]; + u8 fpga_qpn[0x18]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_fpga_destroy_qp_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + #endif /* MLX5_IFC_FPGA_H */ -- cgit v1.2.3 From c43051d72a8dc4a00d49db27292a76d26e8df7af Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Tue, 18 Apr 2017 12:54:27 +0300 Subject: net/mlx5: FPGA, Add SBU bypass and reset flows The Innova FPGA includes shell hardware and Sandbox-Unit (SBU) hardware. The shell hardware is handled by mlx5_core itself, while the SBU is handled by a client driver. Reset the SBU to a well-known initial state when initializing a new device, and set the FPGA to bypass mode when uninitializing a device. This allows the client driver to assume that its device has been reset when a new device is detected. During SBU reset, the FPGA is put into SBU-bypass mode. In this mode packets do not pass through the SBU, so it cannot affect the network data stream at all. A factory-image does not have an SBU, so skip these flows. Signed-off-by: Ilan Tayari Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc_fpga.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 30d4b697fab6..0694077c9634 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -108,6 +108,15 @@ struct mlx5_ifc_fpga_cap_bits { u8 reserved_at_500[0x300]; }; +enum { + MLX5_FPGA_CTRL_OPERATION_LOAD = 0x1, + MLX5_FPGA_CTRL_OPERATION_RESET = 0x2, + MLX5_FPGA_CTRL_OPERATION_FLASH_SELECT = 0x3, + MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON = 0x4, + MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF = 0x5, + MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX = 0x6, +}; + struct mlx5_ifc_fpga_ctrl_bits { u8 reserved_at_0[0x8]; u8 operation[0x8]; -- cgit v1.2.3 From a9956d35d199beb406727a4496bc5d7f09c82976 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Tue, 18 Apr 2017 13:10:41 +0300 Subject: net/mlx5: FPGA, Add SBU infrastructure Add interface to initialize and interact with Innova FPGA SBU connections. A client driver may use these functions to set up a high-speed DMA connection with its SBU hardware logic, and send/receive messages over this connection. A later patch in this patchset will make use of these functions for Innova IPSec offload in mlx5 Ethernet driver. Add commands to retrieve Innova FPGA SBU capabilities, and to read/write Innova FPGA configuration space registers and memory, over internal I2C. At high level, the FPGA configuration space is divided such: 0x00000000 - 0x007fffff is reserved for the SBU 0x00800000 - 0xffffffff is reserved for the Shell 0x400000000 - ... is DDR memory A later patchset will add support for accessing FPGA CrSpace and memory over a high-speed connection. This is the reason for the ACCESS_TYPE enumeration, which currently only supports I2C. Signed-off-by: Ilan Tayari Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 3 +++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 1 + include/linux/mlx5/mlx5_ifc_fpga.h | 13 +++++++++++++ 4 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 556e1c31b5d0..f31a0b5377e1 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1103,6 +1103,9 @@ enum mlx5_mcam_feature_groups { #define MLX5_CAP_FPGA(mdev, cap) \ MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) +#define MLX5_CAP64_FPGA(mdev, cap) \ + MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 32b0835d4491..2ab4ae3e3a1a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -111,6 +111,7 @@ enum { MLX5_REG_DCBX_APP = 0x4021, MLX5_REG_FPGA_CAP = 0x4022, MLX5_REG_FPGA_CTRL = 0x4023, + MLX5_REG_FPGA_ACCESS_REG = 0x4024, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a8b3fcaa33ff..c72f9735119d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8309,6 +8309,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_sltp_reg_bits sltp_reg; struct mlx5_ifc_mtpps_reg_bits mtpps_reg; struct mlx5_ifc_mtppse_reg_bits mtppse_reg; + struct mlx5_ifc_fpga_access_reg_bits fpga_access_reg; struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits; struct mlx5_ifc_fpga_cap_bits fpga_cap_bits; struct mlx5_ifc_mcqi_reg_bits mcqi_reg; diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 0694077c9634..a3576654179e 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -150,6 +150,19 @@ struct mlx5_ifc_fpga_error_event_bits { u8 reserved_at_60[0x80]; }; +#define MLX5_FPGA_ACCESS_REG_SIZE_MAX 64 + +struct mlx5_ifc_fpga_access_reg_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x10]; + u8 size[0x10]; + + u8 address[0x40]; + + u8 data[0][0x8]; +}; + enum mlx5_ifc_fpga_qp_state { MLX5_FPGA_QPC_STATE_INIT = 0x0, MLX5_FPGA_QPC_STATE_ACTIVE = 0x1, -- cgit v1.2.3 From bebb23e6cb02d2fc752905e39d09ff6152852c6c Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Tue, 25 Apr 2017 22:42:31 +0300 Subject: net/mlx5: Accel, Add IPSec acceleration interface Add routines for manipulating the hardware IPSec SA database (SADB). In Innova IPSec, a Security Association (SA) is added or deleted via a command message over the SBU connection. The HW then sends a response message over the same connection. Add implementation for Innova IPSec (FPGA-based) hardware. These routines will be used by the IPSec offload support in a later patch However they may also be used by others such as RDMA and RoCE IPSec. mlx5/accel is a middle acceleration layer to allow mlx5e and other ULPs to work directly with mlx5_core rather than Innova FPGA or other mlx5 acceleration providers. In this patchset we add Innova IPSec support and mlx5/accel delegates IPSec offloads to Innova routines. In the future, when IPSec/TLS or any other acceleration gets integrated into ConnectX chip, mlx5/accel layer will provide the integrated acceleration, rather than the Innova one. Signed-off-by: Ilan Tayari Signed-off-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc_fpga.h | 67 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index a3576654179e..255a88d08078 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -32,6 +32,14 @@ #ifndef MLX5_IFC_FPGA_H #define MLX5_IFC_FPGA_H +enum { + MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9, +}; + +enum { + MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2, +}; + struct mlx5_ifc_fpga_shell_caps_bits { u8 max_num_qps[0x10]; u8 reserved_at_10[0x8]; @@ -362,4 +370,63 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_ipsec_extended_cap_bits { + u8 encapsulation[0x20]; + + u8 reserved_0[0x15]; + u8 ipv4_fragment[0x1]; + u8 ipv6[0x1]; + u8 esn[0x1]; + u8 lso[0x1]; + u8 transport_and_tunnel_mode[0x1]; + u8 tunnel_mode[0x1]; + u8 transport_mode[0x1]; + u8 ah_esp[0x1]; + u8 esp[0x1]; + u8 ah[0x1]; + u8 ipv4_options[0x1]; + + u8 auth_alg[0x20]; + + u8 enc_alg[0x20]; + + u8 sa_cap[0x20]; + + u8 reserved_1[0x10]; + u8 number_of_ipsec_counters[0x10]; + + u8 ipsec_counters_addr_low[0x20]; + u8 ipsec_counters_addr_high[0x20]; +}; + +struct mlx5_ifc_ipsec_counters_bits { + u8 dec_in_packets[0x40]; + + u8 dec_out_packets[0x40]; + + u8 dec_bypass_packets[0x40]; + + u8 enc_in_packets[0x40]; + + u8 enc_out_packets[0x40]; + + u8 enc_bypass_packets[0x40]; + + u8 drop_dec_packets[0x40]; + + u8 failed_auth_dec_packets[0x40]; + + u8 drop_enc_packets[0x40]; + + u8 success_add_sa[0x40]; + + u8 fail_add_sa[0x40]; + + u8 success_delete_sa[0x40]; + + u8 fail_delete_sa[0x40]; + + u8 dropped_cmd[0x40]; +}; + #endif /* MLX5_IFC_FPGA_H */ -- cgit v1.2.3 From 547eede070eb981f1442e494f08f4567dcf1d1c7 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Tue, 18 Apr 2017 16:04:28 +0300 Subject: net/mlx5e: IPSec, Innova IPSec offload infrastructure Add Innova IPSec ESP crypto offload configuration paths. Detect Innova IPSec device and set the NETIF_F_HW_ESP flag. Configure Security Associations using the API introduced in a previous patch. Add Software-parser hardware descriptor layout Software-Parser (swp) is a hardware feature in ConnectX which allows the host software to specify protocol header offsets in the TX path, thus overriding the hardware parser. This is useful for protocols that the ASIC may not be able to parse on its own. Note that due to inline metadata, XDP is not supported in Innova IPSec. Signed-off-by: Ilan Tayari Signed-off-by: Yossi Kuperman Signed-off-by: Yevgeny Kliteynik Signed-off-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 8 ++++++-- include/linux/mlx5/qp.h | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c72f9735119d..87869c04849a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -605,7 +605,10 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 tunnel_statless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; - u8 reserved_at_20[0x20]; + u8 swp[0x1]; + u8 swp_csum[0x1]; + u8 swp_lso[0x1]; + u8 reserved_at_23[0x1d]; u8 reserved_at_40[0x10]; u8 lro_min_mss_size[0x10]; @@ -2438,7 +2441,8 @@ struct mlx5_ifc_sqc_bits { u8 min_wqe_inline_mode[0x3]; u8 state[0x4]; u8 reg_umr[0x1]; - u8 reserved_at_d[0x13]; + u8 allow_swp[0x1]; + u8 reserved_at_e[0x12]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 1f637f4d1265..6f41270d80c0 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -225,10 +225,20 @@ enum { MLX5_ETH_WQE_INSERT_VLAN = 1 << 15, }; +enum { + MLX5_ETH_WQE_SWP_INNER_L3_IPV6 = 1 << 0, + MLX5_ETH_WQE_SWP_INNER_L4_UDP = 1 << 1, + MLX5_ETH_WQE_SWP_OUTER_L3_IPV6 = 1 << 4, + MLX5_ETH_WQE_SWP_OUTER_L4_UDP = 1 << 5, +}; + struct mlx5_wqe_eth_seg { - u8 rsvd0[4]; + u8 swp_outer_l4_offset; + u8 swp_outer_l3_offset; + u8 swp_inner_l4_offset; + u8 swp_inner_l3_offset; u8 cs_flags; - u8 rsvd1; + u8 swp_flags; __be16 mss; __be32 rsvd2; union { -- cgit v1.2.3 From 29d99b966d60029a11d08b9b004cd84b21ce0d67 Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Tue, 14 Feb 2017 20:58:02 +0100 Subject: cros_ec: Don't signal wake event for non-wake host events The subset of wake-enabled host events is defined by the EC, but the EC may still send non-wake host events if we're in the process of suspending. Get the mask of wake-enabled host events from the EC and filter out non-wake events to prevent spurious aborted suspend attempts. Signed-off-by: Shawn Nematbakhsh Signed-off-by: Thierry Escande Acked-for-MFD-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 3b16c9009749..4e887ba22635 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -149,6 +149,7 @@ struct cros_ec_device { struct ec_response_get_next_event event_data; int event_size; + u32 host_event_wake_mask; }; /** @@ -299,10 +300,12 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev); * cros_ec_get_next_event - Fetch next event from the ChromeOS EC * * @ec_dev: Device to fetch event from + * @wake_event: Pointer to a bool set to true upon return if the event might be + * treated as a wake event. Ignored if null. * * Returns: 0 on success, Linux error number on failure */ -int cros_ec_get_next_event(struct cros_ec_device *ec_dev); +int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event); /** * cros_ec_get_host_event - Return a mask of event set by the EC. -- cgit v1.2.3 From d914ba37d7145acb9fd3bb23075c2d56e5a44eb6 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 26 Jun 2017 19:01:55 -0700 Subject: tracing: Add support for recording tgid of tasks Inorder to support recording of tgid, the following changes are made: * Introduce a new API (tracing_record_taskinfo) to additionally record the tgid along with the task's comm at the same time. This has has the benefit of not setting trace_cmdline_save before all the information for a task is saved. * Add a new API tracing_record_taskinfo_sched_switch to record task information for 2 tasks at a time (previous and next) and use it from sched_switch probe. * Preserve the old API (tracing_record_cmdline) and create it as a wrapper around the new one so that existing callers aren't affected. * Reuse the existing sched_switch and sched_wakeup probes to record tgid information and add a new option 'record-tgid' to enable recording of tgid When record-tgid option isn't enabled to being with, we take care to make sure that there's isn't memory or runtime overhead. Link: http://lkml.kernel.org/r/20170627020155.5139-1-joelaf@google.com Cc: kernel-team@android.com Cc: Ingo Molnar Tested-by: Michael Sartain Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index a556805eff8a..f73cedfa2e0b 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -151,7 +151,15 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, int type, unsigned long len, unsigned long flags, int pc); -void tracing_record_cmdline(struct task_struct *tsk); +#define TRACE_RECORD_CMDLINE BIT(0) +#define TRACE_RECORD_TGID BIT(1) + +void tracing_record_taskinfo(struct task_struct *task, int flags); +void tracing_record_taskinfo_sched_switch(struct task_struct *prev, + struct task_struct *next, int flags); + +void tracing_record_cmdline(struct task_struct *task); +void tracing_record_tgid(struct task_struct *task); int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); @@ -290,6 +298,7 @@ struct trace_subsystem_dir; enum { EVENT_FILE_FL_ENABLED_BIT, EVENT_FILE_FL_RECORDED_CMD_BIT, + EVENT_FILE_FL_RECORDED_TGID_BIT, EVENT_FILE_FL_FILTERED_BIT, EVENT_FILE_FL_NO_SET_FILTER_BIT, EVENT_FILE_FL_SOFT_MODE_BIT, @@ -303,6 +312,7 @@ enum { * Event file flags: * ENABLED - The event is enabled * RECORDED_CMD - The comms should be recorded at sched_switch + * RECORDED_TGID - The tgids should be recorded at sched_switch * FILTERED - The event has a filter attached * NO_SET_FILTER - Set when filter has error and is to be ignored * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED @@ -315,6 +325,7 @@ enum { enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), + EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT), EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), -- cgit v1.2.3 From c75b1d9421f80f4143e389d2d50ddfc8a28c8c35 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Jun 2017 11:47:04 -0600 Subject: fs: add fcntl() interface for setting/getting write life time hints Define a set of write life time hints: RWH_WRITE_LIFE_NOT_SET No hint information set RWH_WRITE_LIFE_NONE No hints about write life time RWH_WRITE_LIFE_SHORT Data written has a short life time RWH_WRITE_LIFE_MEDIUM Data written has a medium life time RWH_WRITE_LIFE_LONG Data written has a long life time RWH_WRITE_LIFE_EXTREME Data written has an extremely long life time The intent is for these values to be relative to each other, no absolute meaning should be attached to these flag names. Add an fcntl interface for querying these flags, and also for setting them as well: F_GET_RW_HINT Returns the read/write hint set on the underlying inode. F_SET_RW_HINT Set one of the above write hints on the underlying inode. F_GET_FILE_RW_HINT Returns the read/write hint set on the file descriptor. F_SET_FILE_RW_HINT Set one of the above write hints on the file descriptor. The user passes in a 64-bit pointer to get/set these values, and the interface returns 0/-1 on success/error. Sample program testing/implementing basic setting/getting of write hints is below. Add support for storing the write life time hint in the inode flags and in struct file as well, and pass them to the kiocb flags. If both a file and its corresponding inode has a write hint, then we use the one in the file, if available. The file hint can be used for sync/direct IO, for buffered writeback only the inode hint is available. This is in preparation for utilizing these hints in the block layer, to guide on-media data placement. /* * writehint.c: get or set an inode write hint */ #include #include #include #include #include #include #ifndef F_GET_RW_HINT #define F_LINUX_SPECIFIC_BASE 1024 #define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11) #define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12) #endif static char *str[] = { "RWF_WRITE_LIFE_NOT_SET", "RWH_WRITE_LIFE_NONE", "RWH_WRITE_LIFE_SHORT", "RWH_WRITE_LIFE_MEDIUM", "RWH_WRITE_LIFE_LONG", "RWH_WRITE_LIFE_EXTREME" }; int main(int argc, char *argv[]) { uint64_t hint; int fd, ret; if (argc < 2) { fprintf(stderr, "%s: file \n", argv[0]); return 1; } fd = open(argv[1], O_RDONLY); if (fd < 0) { perror("open"); return 2; } if (argc > 2) { hint = atoi(argv[2]); ret = fcntl(fd, F_SET_RW_HINT, &hint); if (ret < 0) { perror("fcntl: F_SET_RW_HINT"); return 4; } } ret = fcntl(fd, F_GET_RW_HINT, &hint); if (ret < 0) { perror("fcntl: F_GET_RW_HINT"); return 3; } printf("%s: hint %s\n", argv[1], str[hint]); close(fd); return 0; } Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/fs.h | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4574121f4746..65adbddb3163 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -265,6 +266,18 @@ struct page; struct address_space; struct writeback_control; +/* + * Write life time hint values. + */ +enum rw_hint { + WRITE_LIFE_NOT_SET = 0, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +}; + #define IOCB_EVENTFD (1 << 0) #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) @@ -280,6 +293,7 @@ struct kiocb { void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); void *private; int ki_flags; + enum rw_hint ki_hint; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) @@ -287,16 +301,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) return kiocb->ki_complete == NULL; } -static inline int iocb_flags(struct file *file); - -static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) -{ - *kiocb = (struct kiocb) { - .ki_filp = filp, - .ki_flags = iocb_flags(filp), - }; -} - /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet @@ -597,6 +601,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; unsigned int i_blkbits; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED @@ -851,6 +856,7 @@ struct file { * Must not be taken from IRQ context. */ spinlock_t f_lock; + enum rw_hint f_write_hint; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -1026,8 +1032,6 @@ struct file_lock_context { #define OFFT_OFFSET_MAX INT_LIMIT(off_t) #endif -#include - extern void send_sigio(struct fown_struct *fown, int fd, int band); /* @@ -1878,6 +1882,25 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode) return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid); } +static inline enum rw_hint file_write_hint(struct file *file) +{ + if (file->f_write_hint != WRITE_LIFE_NOT_SET) + return file->f_write_hint; + + return file_inode(file)->i_write_hint; +} + +static inline int iocb_flags(struct file *file); + +static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) +{ + *kiocb = (struct kiocb) { + .ki_filp = filp, + .ki_flags = iocb_flags(filp), + .ki_hint = file_write_hint(filp), + }; +} + /* * Inode state bits. Protected by inode->i_lock * -- cgit v1.2.3 From cb6934f8ea1a595902ca37e250e0917d4dd7b2a7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Jun 2017 09:22:02 -0600 Subject: block: add support for write hints in a bio No functional changes in this patch, we just use up some holes in the bio and request structures to define a write hint that we psas down the stack. Ensure that we don't merge requests that have different life time hints assigned to them, and that we inherit the write hint when cloning a bio. Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + include/linux/blkdev.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e210da6d14b8..d2eb87c84d82 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -56,6 +56,7 @@ struct bio { */ unsigned short bi_flags; /* status, etc and bvec pool number */ unsigned short bi_ioprio; + unsigned short bi_write_hint; struct bvec_iter bi_iter; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bf2157141d53..0eebd3bcfd85 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -225,6 +225,8 @@ struct request { unsigned int extra_len; /* length of alignment and padding */ + unsigned short write_hint; + unsigned long deadline; struct list_head timeout_list; -- cgit v1.2.3 From f793dfd3f39a3dc50468b06498606b3a906f42f1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Jun 2017 08:15:27 -0600 Subject: blk-mq: expose write hints through debugfs Useful to verify that things are working the way they should. Reading the file will return number of kb written with each write hint. Writing the file will reset the statistics. No care is taken to ensure that we don't race on updates. Drivers will write to q->write_hints[] if they handle a given write hint. Reviewed-by: Andreas Dilger Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0eebd3bcfd85..e1e289ab66b9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -596,6 +596,9 @@ struct request_queue { void *rq_alloc_data; struct work_struct release_work; + +#define BLK_MAX_WRITE_HINTS 5 + u64 write_hints[BLK_MAX_WRITE_HINTS]; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -- cgit v1.2.3 From f5d118406247acfc4fc481e441e01ea4d6318fdc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Jun 2017 12:03:06 -0600 Subject: nvme: add support for streams and directives This adds support for Directives in NVMe, particular for the Streams directive. Support for Directives is a new feature in NVMe 1.3. It allows a user to pass in information about where to store the data, so that it the device can do so most effiently. If an application is managing and writing data with different life times, mixing differently retentioned data onto the same locations on flash can cause write amplification to grow. This, in turn, will reduce performance and life time of the device. Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/nvme.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 291587a0743f..f516a975bb21 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -253,6 +253,7 @@ enum { NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, }; @@ -303,6 +304,19 @@ enum { NVME_ID_CNS_CTRL_LIST = 0x13, }; +enum { + NVME_DIR_IDENTIFY = 0x00, + NVME_DIR_STREAMS = 0x01, + NVME_DIR_SND_ID_OP_ENABLE = 0x01, + NVME_DIR_SND_ST_OP_REL_ID = 0x01, + NVME_DIR_SND_ST_OP_REL_RSC = 0x02, + NVME_DIR_RCV_ID_OP_PARAM = 0x01, + NVME_DIR_RCV_ST_OP_PARAM = 0x01, + NVME_DIR_RCV_ST_OP_STATUS = 0x02, + NVME_DIR_RCV_ST_OP_RESOURCE = 0x03, + NVME_DIR_ENDIR = 0x01, +}; + enum { NVME_NS_FEAT_THIN = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, @@ -560,6 +574,7 @@ enum { NVME_RW_PRINFO_PRCHK_APP = 1 << 11, NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRACT = 1 << 13, + NVME_RW_DTYPE_STREAMS = 1 << 4, }; struct nvme_dsm_cmd { @@ -634,6 +649,8 @@ enum nvme_admin_opcode { nvme_admin_download_fw = 0x11, nvme_admin_ns_attach = 0x15, nvme_admin_keep_alive = 0x18, + nvme_admin_directive_send = 0x19, + nvme_admin_directive_recv = 0x1a, nvme_admin_dbbuf = 0x7C, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, @@ -797,6 +814,24 @@ struct nvme_get_log_page_command { __u32 rsvd14[2]; }; +struct nvme_directive_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + union nvme_data_ptr dptr; + __le32 numd; + __u8 doper; + __u8 dtype; + __le16 dspec; + __u8 endir; + __u8 tdtype; + __u16 rsvd15; + + __u32 rsvd16[3]; +}; + /* * Fabrics subcommands. */ @@ -927,6 +962,18 @@ struct nvme_dbbuf { __u32 rsvd12[6]; }; +struct streams_directive_params { + __u16 msl; + __u16 nssa; + __u16 nsso; + __u8 rsvd[10]; + __u32 sws; + __u16 sgs; + __u16 nsa; + __u16 nso; + __u8 rsvd2[6]; +}; + struct nvme_command { union { struct nvme_common_command common; @@ -947,6 +994,7 @@ struct nvme_command { struct nvmf_property_set_command prop_set; struct nvmf_property_get_command prop_get; struct nvme_dbbuf dbbuf; + struct nvme_directive_cmd directive; }; }; -- cgit v1.2.3 From 3bce016a4c5975e4279bfb3cbd6d0332b856cc72 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jun 2017 09:26:21 +0200 Subject: block: move bounce declarations to block/blk.h Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e1e289ab66b9..e7eef48c97c9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -884,19 +884,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) #define BLK_MIN_SG_TIMEOUT (7 * HZ) -#ifdef CONFIG_BOUNCE -extern int init_emergency_isa_pool(void); -extern void blk_queue_bounce(struct request_queue *q, struct bio **bio); -#else -static inline int init_emergency_isa_pool(void) -{ - return 0; -} -static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) -{ -} -#endif /* CONFIG_MMU */ - struct rq_map_data { struct page **pages; int page_order; -- cgit v1.2.3 From 1c4bc3ab9a064d98cdf6de6b44f89d5c3757fa32 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jun 2017 09:26:22 +0200 Subject: block: remove the queue_bounce_pfn helper Only used inside the bounce code, and opencoding it makes it more obvious what is going on. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e7eef48c97c9..25f6a0cb27d3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1385,11 +1385,6 @@ enum blk_default_limits { #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) -static inline unsigned long queue_bounce_pfn(struct request_queue *q) -{ - return q->limits.bounce_pfn; -} - static inline unsigned long queue_segment_boundary(struct request_queue *q) { return q->limits.seg_boundary_mask; -- cgit v1.2.3 From f2b612578e163b49661ece2fe01dfafb0e78f545 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 23:00:34 -0700 Subject: x86, libnvdimm, pmem: move arch_invalidate_pmem() to libnvdimm Kill this globally defined wrapper and move to libnvdimm so that we can ultimately remove include/linux/pmem.h and asm/pmem.h. Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/pmem.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 33ae761f010a..559c00848583 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -30,11 +30,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) { BUG(); } - -static inline void arch_invalidate_pmem(void *addr, size_t size) -{ - BUG(); -} #endif static inline bool arch_has_pmem_api(void) @@ -61,18 +56,4 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) else memcpy(dst, src, n); } - -/** - * invalidate_pmem - flush a pmem range from the cache hierarchy - * @addr: virtual start address - * @size: bytes to invalidate (internally aligned to cache line size) - * - * For platforms that support clearing poison this flushes any poisoned - * ranges out of the cache - */ -static inline void invalidate_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_invalidate_pmem(addr, size); -} #endif /* __PMEM_H__ */ -- cgit v1.2.3 From ca6a4657e5420dec727256717e905ebc3c751352 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 13 Jan 2017 20:36:58 -0800 Subject: x86, libnvdimm, pmem: remove global pmem api Now that all callers of the pmem api have been converted to dax helpers that call back to the pmem driver, we can remove include/linux/pmem.h and asm/pmem.h. Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Toshi Kani Cc: Oliver O'Halloran Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + include/linux/pmem.h | 59 ----------------------------------------------- 2 files changed, 1 insertion(+), 59 deletions(-) delete mode 100644 include/linux/pmem.h (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 6c807017128d..b2f659bd661d 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -159,6 +159,7 @@ void *nd_region_provider_data(struct nd_region *nd_region); void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); +unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr); unsigned int nd_region_acquire_lane(struct nd_region *nd_region); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); diff --git a/include/linux/pmem.h b/include/linux/pmem.h deleted file mode 100644 index 559c00848583..000000000000 --- a/include/linux/pmem.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright(c) 2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#ifndef __PMEM_H__ -#define __PMEM_H__ - -#include -#include - -#ifdef CONFIG_ARCH_HAS_PMEM_API -#define ARCH_MEMREMAP_PMEM MEMREMAP_WB -#include -#else -#define ARCH_MEMREMAP_PMEM MEMREMAP_WT -/* - * These are simply here to enable compilation, all call sites gate - * calling these symbols with arch_has_pmem_api() and redirect to the - * implementation in asm/pmem.h. - */ -static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - BUG(); -} -#endif - -static inline bool arch_has_pmem_api(void) -{ - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); -} - -/** - * memcpy_to_pmem - copy data to persistent memory - * @dst: destination buffer for the copy - * @src: source buffer for the copy - * @n: length of the copy in bytes - * - * Perform a memory copy that results in the destination of the copy - * being effectively evicted from, or never written to, the processor - * cache hierarchy after the copy completes. After memcpy_to_pmem() - * data may still reside in cpu or platform buffers, so this operation - * must be followed by a blkdev_issue_flush() on the pmem block device. - */ -static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - if (arch_has_pmem_api()) - arch_memcpy_to_pmem(dst, src, n); - else - memcpy(dst, src, n); -} -#endif /* __PMEM_H__ */ -- cgit v1.2.3 From 5d61e43b3975c0582003329d9de9d5e85abf5d33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 27 Jun 2017 13:06:22 -0700 Subject: dax: remove default copy_from_iter fallback Require all dax-drivers to register a ->copy_from_iter() operation so that it is clear which dax_operations are optional and which must be implemented for filesystem-dax to operate. Cc: Gerald Schaefer Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/dax.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 1f6b6072af64..73fca1bebaf3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -16,7 +16,7 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, void **, pfn_t *); - /* copy_from_iter: dax-driver override for default copy_from_iter */ + /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); /* flush: optional driver-specific cache management after writes */ -- cgit v1.2.3 From 8370c2dc4c7b91be7e1231130f0ae08b5aebecf4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 24 Jun 2017 01:56:13 +0200 Subject: PCI / PM: Drop pme_interrupt flag from struct pci_dev The pme_interrupt flag in struct pci_dev is set when PMEs generated by the device are going to be signaled via root port PME interrupts. Ironically enough, that information is only used by the code setting up device wakeup through ACPI which returns as soon as it sees the pme_interrupt flag set while setting up "remote runtime wakeup". That is questionable, however, because in theory there may be PCIe devices using out-of-band PME signaling under root ports handled by the native PME code or devices requiring wakeup power setup to be carried out by AML. For such devices, ACPI wakeup should be invoked regardless of whether or not native PME signaling is used in general. For this reason, drop the pme_interrupt flag and rework the code using it which then allows the ACPI-based device wakeup handling in PCI to be consolidated to use one code path for both "runtime remote wakeup" and system wakeup (from sleep states). Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Acked-by: Bjorn Helgaas --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 8039f9f0ca05..d3d5bca82b43 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -307,7 +307,6 @@ struct pci_dev { u8 pm_cap; /* PM capability offset */ unsigned int pme_support:5; /* Bitmask of states from which PME# can be generated */ - unsigned int pme_interrupt:1; unsigned int pme_poll:1; /* Poll device's PME status bit */ unsigned int d1_support:1; /* Low power state D1 is supported */ unsigned int d2_support:1; /* Low power state D2 is supported */ -- cgit v1.2.3 From 0847684cfc5f0e9f009919bfdcb041d60e19b856 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 24 Jun 2017 01:57:35 +0200 Subject: PCI / PM: Simplify device wakeup settings code After previous changes it is not necessary to distinguish between device wakeup for run time and device wakeup from system sleep states any more, so rework the PCI device wakeup settings code accordingly. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Acked-by: Bjorn Helgaas --- include/linux/pci.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index d3d5bca82b43..ff200c84240b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1097,8 +1097,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); -int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, - bool runtime, bool enable); +int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable); int pci_wake_from_d3(struct pci_dev *dev, bool enable); int pci_prepare_to_sleep(struct pci_dev *dev); int pci_back_from_sleep(struct pci_dev *dev); @@ -1108,12 +1107,6 @@ void pci_pme_wakeup_bus(struct pci_bus *bus); void pci_d3cold_enable(struct pci_dev *dev); void pci_d3cold_disable(struct pci_dev *dev); -static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, - bool enable) -{ - return __pci_enable_wake(dev, state, false, enable); -} - /* PCI Virtual Channel */ int pci_save_vc_state(struct pci_dev *dev); void pci_restore_vc_state(struct pci_dev *dev); -- cgit v1.2.3 From de3ef1eb1cd0cc3a75f7a3661e10ed827f370ab8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 24 Jun 2017 01:58:53 +0200 Subject: PM / core: Drop run_wake flag from struct dev_pm_info The run_wake flag in struct dev_pm_info is used to indicate whether or not the device is capable of generating remote wakeup signals at run time (or in the system working state), but the distinction between runtime remote wakeup and system wakeup signaling has always been rather artificial. The only practical reason for it to exist at the core level was that ACPI and PCI treated those two cases differently, but that's not the case any more after recent changes. For this reason, get rid of the run_wake flag and, when applicable, use device_set_wakeup_capable() and device_can_wakeup() instead of device_set_run_wake() and device_run_wake(), respectively. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Acked-by: Bjorn Helgaas --- include/linux/pm.h | 1 - include/linux/pm_runtime.h | 12 ------------ 2 files changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index a0894bc52bb4..b8b4df09fd8f 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -584,7 +584,6 @@ struct dev_pm_info { unsigned int idle_notification:1; unsigned int request_pending:1; unsigned int deferred_resume:1; - unsigned int run_wake:1; unsigned int runtime_auto:1; bool ignore_children:1; unsigned int no_callbacks:1; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index ca4823e675e2..2efb08a60e63 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -76,16 +76,6 @@ static inline void pm_runtime_put_noidle(struct device *dev) atomic_add_unless(&dev->power.usage_count, -1, 0); } -static inline bool device_run_wake(struct device *dev) -{ - return dev->power.run_wake; -} - -static inline void device_set_run_wake(struct device *dev, bool enable) -{ - dev->power.run_wake = enable; -} - static inline bool pm_runtime_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED @@ -163,8 +153,6 @@ static inline void pm_runtime_forbid(struct device *dev) {} static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} -static inline bool device_run_wake(struct device *dev) { return false; } -static inline void device_set_run_wake(struct device *dev, bool enable) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } -- cgit v1.2.3 From fd851a3cdc196bfc1d229b5f22369069af532bf8 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 29 May 2017 12:22:23 +1000 Subject: spin loop primitives for busy waiting Current busy-wait loops are implemented by repeatedly calling cpu_relax() to give an arch option for a low-latency option to improve power and/or SMT resource contention. This poses some difficulties for powerpc, which has SMT priority setting instructions (priorities determine how ifetch cycles are apportioned). powerpc's cpu_relax() is implemented by setting a low priority then setting normal priority. This has several problems: - Changing thread priority can have some execution cost and potential impact to other threads in the core. It's inefficient to execute them every time around a busy-wait loop. - Depending on implementation details, a `low ; medium` sequence may not have much if any affect. Some software with similar pattern actually inserts a lot of nops between, in order to cause a few fetch cycles with the low priority. - The busy-wait loop runs with regular priority. This might only be a few fetch cycles, but if there are several threads running such loops, they could cause a noticable impact on a non-idle thread. Implement spin_begin, spin_end primitives that can be used around busy wait loops, which default to no-ops. And spin_cpu_relax which defaults to cpu_relax. This will allow architectures to hook the entry and exit of busy-wait loops, and will allow powerpc to set low SMT priority at entry, and normal priority at exit. Suggested-by: Linus Torvalds Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- include/linux/processor.h | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 include/linux/processor.h (limited to 'include/linux') diff --git a/include/linux/processor.h b/include/linux/processor.h new file mode 100644 index 000000000000..da0c5e56ca02 --- /dev/null +++ b/include/linux/processor.h @@ -0,0 +1,70 @@ +/* Misc low level processor primitives */ +#ifndef _LINUX_PROCESSOR_H +#define _LINUX_PROCESSOR_H + +#include + +/* + * spin_begin is used before beginning a busy-wait loop, and must be paired + * with spin_end when the loop is exited. spin_cpu_relax must be called + * within the loop. + * + * The loop body should be as small and fast as possible, on the order of + * tens of instructions/cycles as a guide. It should and avoid calling + * cpu_relax, or any "spin" or sleep type of primitive including nested uses + * of these primitives. It should not lock or take any other resource. + * Violations of these guidelies will not cause a bug, but may cause sub + * optimal performance. + * + * These loops are optimized to be used where wait times are expected to be + * less than the cost of a context switch (and associated overhead). + * + * Detection of resource owner and decision to spin or sleep or guest-yield + * (e.g., spin lock holder vcpu preempted, or mutex owner not on CPU) can be + * tested within the loop body. + */ +#ifndef spin_begin +#define spin_begin() +#endif + +#ifndef spin_cpu_relax +#define spin_cpu_relax() cpu_relax() +#endif + +/* + * spin_cpu_yield may be called to yield (undirected) to the hypervisor if + * necessary. This should be used if the wait is expected to take longer + * than context switch overhead, but we can't sleep or do a directed yield. + */ +#ifndef spin_cpu_yield +#define spin_cpu_yield() cpu_relax_yield() +#endif + +#ifndef spin_end +#define spin_end() +#endif + +/* + * spin_until_cond can be used to wait for a condition to become true. It + * may be expected that the first iteration will true in the common case + * (no spinning), so that callers should not require a first "likely" test + * for the uncontended case before using this primitive. + * + * Usage and implementation guidelines are the same as for the spin_begin + * primitives, above. + */ +#ifndef spin_until_cond +#define spin_until_cond(cond) \ +do { \ + if (unlikely(!(cond))) { \ + spin_begin(); \ + do { \ + spin_cpu_relax(); \ + } while (!(cond)); \ + spin_end(); \ + } \ +} while (0) + +#endif + +#endif /* _LINUX_PROCESSOR_H */ -- cgit v1.2.3 From f51f288e237cbcfd3dbd1d4fa2d3dec00d7253e2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 May 2017 10:58:49 +0200 Subject: dma-mapping: remove DMA_ERROR_CODE And update the documentation - dma_mapping_error has been supported everywhere for a long time. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4f3eecedca2d..a57875309bfd 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -546,12 +546,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) if (get_dma_ops(dev)->mapping_error) return get_dma_ops(dev)->mapping_error(dev, dma_addr); - -#ifdef DMA_ERROR_CODE - return dma_addr == DMA_ERROR_CODE; -#else return 0; -#endif } #ifndef HAVE_ARCH_DMA_SUPPORTED -- cgit v1.2.3 From 447d899b18169b2ee5e42b2fa8b32dbb40a30a24 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 May 2017 11:40:56 +0200 Subject: dma-mapping: remove HAVE_ARCH_DMA_SUPPORTED Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index a57875309bfd..3e5908656226 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -549,7 +549,6 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return 0; } -#ifndef HAVE_ARCH_DMA_SUPPORTED static inline int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -560,7 +559,6 @@ static inline int dma_supported(struct device *dev, u64 mask) return 1; return ops->dma_supported(dev, mask); } -#endif #ifndef HAVE_ARCH_DMA_SET_MASK static inline int dma_set_mask(struct device *dev, u64 mask) -- cgit v1.2.3 From 8cc9c26029d8ac3c627ecf8545b617fb78def5d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Jun 2017 17:05:25 +0200 Subject: dma-mapping: remove the set_dma_mask method Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 3e5908656226..527f2ed8c645 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -127,7 +127,6 @@ struct dma_map_ops { enum dma_data_direction dir); int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); - int (*set_dma_mask)(struct device *dev, u64 mask); #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK u64 (*get_required_mask)(struct device *dev); #endif @@ -563,11 +562,6 @@ static inline int dma_supported(struct device *dev, u64 mask) #ifndef HAVE_ARCH_DMA_SET_MASK static inline int dma_set_mask(struct device *dev, u64 mask) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (ops->set_dma_mask) - return ops->set_dma_mask(dev, mask); - if (!dev->dma_mask || !dma_supported(dev, mask)) return -EIO; *dev->dma_mask = mask; -- cgit v1.2.3 From 03b643866d889d6edc87cdcee2b3880b7879a441 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Jun 2017 19:05:09 +0200 Subject: dma-mapping: remove dmam_free_noncoherent This function was never used since it was added. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo --- include/linux/dma-mapping.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 527f2ed8c645..4038dd34afa3 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -736,8 +736,6 @@ extern void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); extern void *dmam_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); -extern void dmam_free_noncoherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle); #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT extern int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, -- cgit v1.2.3 From 63d36c95500400642f656ba1970980746cf437f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Jun 2017 19:15:04 +0200 Subject: dma-mapping: replace dmam_alloc_noncoherent with dmam_alloc_attrs dmam_alloc_noncoherent is a trivial wrapper around dmam_alloc_attrs, that hardcodes one particular flag. Make the devres code more flexible by allowing the callers to pass arbitrary flags. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo --- include/linux/dma-mapping.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4038dd34afa3..843ab866e0f4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -734,8 +734,9 @@ extern void *dmam_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); extern void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); -extern void *dmam_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp); +extern void *dmam_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs); #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT extern int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, -- cgit v1.2.3 From 7aa1f42752f0d31a5bb6d0d5bac92fc8c2044ce2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 18 Jun 2017 16:15:59 +0300 Subject: nvme: use a single NVME_AQ_DEPTH and relax it to 32 No need to differentiate fabrics from pci/loop, also lower it to 32 as we don't really need 256 inflight admin commands. Signed-off-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f516a975bb21..6b8ee9e628e1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -87,7 +87,7 @@ enum { NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ }; -#define NVMF_AQ_DEPTH 32 +#define NVME_AQ_DEPTH 32 enum { NVME_REG_CAP = 0x0000, /* Controller Capabilities */ -- cgit v1.2.3 From fd25d19f6b8da315332bb75936605fb45d3ea981 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 Jun 2017 13:00:26 -0700 Subject: locking/refcount: Create unchecked atomic_t implementation Many subsystems will not use refcount_t unless there is a way to build the kernel so that there is no regression in speed compared to atomic_t. This adds CONFIG_REFCOUNT_FULL to enable the full refcount_t implementation which has the validation but is slightly slower. When not enabled, refcount_t uses the basic unchecked atomic_t routines, which results in no code changes compared to just using atomic_t directly. Signed-off-by: Kees Cook Acked-by: Greg Kroah-Hartman Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: David S. Miller Cc: David Windsor Cc: Davidlohr Bueso Cc: Elena Reshetova Cc: Eric Biggers Cc: Eric W. Biederman Cc: Hans Liljestrand Cc: James Bottomley Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Manfred Spraul Cc: Peter Zijlstra Cc: Rik van Riel Cc: Serge E. Hallyn Cc: Thomas Gleixner Cc: arozansk@redhat.com Cc: axboe@kernel.dk Cc: linux-arch Link: http://lkml.kernel.org/r/20170621200026.GA115679@beast Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index b34aa649d204..bb71f2871dac 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -41,6 +41,7 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } +#ifdef CONFIG_REFCOUNT_FULL extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r); extern void refcount_add(unsigned int i, refcount_t *r); @@ -52,6 +53,47 @@ extern void refcount_sub(unsigned int i, refcount_t *r); extern __must_check bool refcount_dec_and_test(refcount_t *r); extern void refcount_dec(refcount_t *r); +#else +static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) +{ + return atomic_add_unless(&r->refs, i, 0); +} + +static inline void refcount_add(unsigned int i, refcount_t *r) +{ + atomic_add(i, &r->refs); +} + +static inline __must_check bool refcount_inc_not_zero(refcount_t *r) +{ + return atomic_add_unless(&r->refs, 1, 0); +} + +static inline void refcount_inc(refcount_t *r) +{ + atomic_inc(&r->refs); +} + +static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) +{ + return atomic_sub_and_test(i, &r->refs); +} + +static inline void refcount_sub(unsigned int i, refcount_t *r) +{ + atomic_sub(i, &r->refs); +} + +static inline __must_check bool refcount_dec_and_test(refcount_t *r) +{ + return atomic_dec_and_test(&r->refs); +} + +static inline void refcount_dec(refcount_t *r) +{ + atomic_dec(&r->refs); +} +#endif /* CONFIG_REFCOUNT_FULL */ extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); -- cgit v1.2.3 From a80a32341fbabd4276165a9ce4fa4c80168c0bef Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:17:35 -0400 Subject: svcrdma: Remove svc_rdma_marshal.c svc_rdma_marshal.c has one remaining exported function -- svc_rdma_xdr_decode_req -- and it has a single call site. Take the same approach as the sendto path, and move this function into the source file where it is called. This is a refactoring change only. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f3787d800ba4..3ca991657889 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -185,9 +185,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, struct xdr_buf *rcvbuf); -/* svc_rdma_marshal.c */ -extern int svc_rdma_xdr_decode_req(struct xdr_buf *); - /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *, -- cgit v1.2.3 From 5136f6365ce3eace5a926e10f16ed2a233db5ba9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Jun 2017 14:30:28 -0400 Subject: cgroup: implement "nsdelegate" mount option Currently, cgroup only supports delegation to !root users and cgroup namespaces don't get any special treatments. This limits the usefulness of cgroup namespaces as they by themselves can't be safe delegation boundaries. A process inside a cgroup can change the resource control knobs of the parent in the namespace root and may move processes in and out of the namespace if cgroups outside its namespace are visible somehow. This patch adds a new mount option "nsdelegate" which makes cgroup namespaces delegation boundaries. If set, cgroup behaves as if write permission based delegation took place at namespace boundaries - writes to the resource control knobs from the namespace root are denied and migration crossing the namespace boundary aren't allowed from inside the namespace. This allows cgroup namespace to function as a delegation boundary by itself. v2: Silently ignore nsdelegate specified on !init mounts. Signed-off-by: Tejun Heo Cc: Aravind Anbudurai Cc: Serge Hallyn Cc: Eric Biederman --- include/linux/cgroup-defs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 3bc4196bf217..09f4c7df1478 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -67,12 +67,21 @@ enum { enum { CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ + + /* + * Consider namespaces as delegation boundaries. If this flag is + * set, controller specific interface files in a namespace root + * aren't writeable from inside the namespace. + */ + CGRP_ROOT_NS_DELEGATE = (1 << 3), }; /* cftype->flags */ enum { CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ + CFTYPE_NS_DELEGATABLE = (1 << 2), /* writeable beyond delegation boundaries */ + CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ -- cgit v1.2.3 From a71411dbf6c82ba2eb2519717c04ffb19bc4dda5 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 23 Jun 2017 14:35:09 +0200 Subject: regmap: irq: add chip option mask_writeonly Some irq controllers have writeonly/multipurpose register layouts. In those cases we read invalid data back. Here we add the option mask_writeonly as masking option. Signed-off-by: Michael Grzeschik Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e88649225a60..400172b59f24 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -884,6 +884,7 @@ struct regmap_irq { * * @status_base: Base status register address. * @mask_base: Base mask register address. + * @mask_writeonly: Base mask register is write only. * @unmask_base: Base unmask register address. for chips who have * separate mask and unmask registers * @ack_base: Base ack address. If zero then the chip is clear on read. @@ -927,6 +928,7 @@ struct regmap_irq_chip { unsigned int wake_base; unsigned int type_base; unsigned int irq_reg_stride; + bool mask_writeonly:1; bool init_ack_masked:1; bool mask_invert:1; bool use_ack:1; -- cgit v1.2.3 From 5d6dee80a1e94cc284d03e06d930e60e8d3ecf7d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 28 Jun 2017 13:50:05 -0600 Subject: vfio: New external user group/file match At the point where the kvm-vfio pseudo device wants to release its vfio group reference, we can't always acquire a new reference to make that happen. The group can be in a state where we wouldn't allow a new reference to be added. This new helper function allows a caller to match a file to a group to facilitate this. Given a file and group, report if they match. Thus the caller needs to already have a group reference to match to the file. This allows the deletion of a group without acquiring a new reference. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Reviewed-by: Paolo Bonzini Tested-by: Eric Auger Cc: stable@vger.kernel.org --- include/linux/vfio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index edf9b2cad277..9b34d0af5d27 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -97,6 +97,8 @@ extern void vfio_unregister_iommu_driver( */ extern struct vfio_group *vfio_group_get_external_user(struct file *filep); extern void vfio_group_put_external_user(struct vfio_group *group); +extern bool vfio_external_group_match_file(struct vfio_group *group, + struct file *filep); extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); -- cgit v1.2.3 From dff79b91b8f3279cbe60727368adff1f3a5ab16e Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 28 Jun 2017 15:13:52 -0500 Subject: PCI: Add pci_free_host_bridge() interface Commit a52d1443bba1 ("PCI: Export host bridge registration interface") exported the pci_alloc_host_bridge() interface so that PCI host controllers drivers can make use of it. Introduce pci_alloc_host_bridge() kernel counterpart to free the host bridge data structures, pci_free_host_bridge(), export it and update kernel functions releasing host bridge objects allocated memory to make use of it. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Arnd Bergmann --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..9095b38c2fa3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -458,6 +458,7 @@ static inline struct pci_host_bridge *pci_host_bridge_from_priv(void *priv) } struct pci_host_bridge *pci_alloc_host_bridge(size_t priv); +void pci_free_host_bridge(struct pci_host_bridge *bridge); int pci_register_host_bridge(struct pci_host_bridge *bridge); struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus); -- cgit v1.2.3 From 5c3f18cce08364ef68163228c0b42725d64cd353 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 28 Jun 2017 15:13:53 -0500 Subject: PCI: Add devm_pci_alloc_host_bridge() interface Struct pci_host_bridge can be allocated by PCI host bridge drivers which usually allocate and map memory through devm managed interfaces. Add a devm version for the pci_alloc_host_bridge() interface to simplify PCI host controller driver porting and simplify the driver failure paths. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Arnd Bergmann --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 9095b38c2fa3..d39a66dc67aa 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -458,6 +458,8 @@ static inline struct pci_host_bridge *pci_host_bridge_from_priv(void *priv) } struct pci_host_bridge *pci_alloc_host_bridge(size_t priv); +struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev, + size_t priv); void pci_free_host_bridge(struct pci_host_bridge *bridge); int pci_register_host_bridge(struct pci_host_bridge *bridge); struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus); -- cgit v1.2.3 From 1228c4b6c19a76a2691cfb1403ad1eebf5852b76 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 28 Jun 2017 15:13:55 -0500 Subject: PCI: Add pci_scan_root_bus_bridge() interface The current pci_scan_root_bus() interface is made up of two main code paths: - pci_create_root_bus() - pci_scan_child_bus() pci_create_root_bus() is a wrapper function that allows to create a struct pci_host_bridge structure, initialize it with the passed parameters and register it with the kernel. As the struct pci_host_bridge require additional struct members, pci_create_root_bus() parameters list has grown in time, making it unwieldy to add further parameters to it in case the struct pci_host_bridge gains more members fields to augment its functionality. Since PCI core code provides functions to allocate struct pci_host_bridge, instead of forcing the pci_create_root_bus() interface to add new parameters to cater for new struct pci_host_bridge functionality, it is more suitable to add an interface in PCI core code to scan a PCI bus straight from a struct pci_host_bridge created and customized by each specific PCI host controller driver. Add a pci_scan_root_bus_bridge() function to allow PCI host controller drivers to create and initialize struct pci_host_bridge and scan the resulting bus. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Arnd Bergmann --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index d39a66dc67aa..fe1eafd637c3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -857,6 +857,7 @@ struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus, struct pci_bus *pci_scan_root_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata, struct list_head *resources); +int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge); struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); void pcie_update_link_speed(struct pci_bus *bus, u16 link_status); -- cgit v1.2.3 From cea9bc0be624fb0dc488cb10df40be1323b6b758 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 28 Jun 2017 15:13:55 -0500 Subject: PCI: Make pci_register_host_bridge() PCI core internal With the introduction of pci_scan_root_bus_bridge() there is no need to export pci_register_host_bridge() to other kernel subsystems other than the PCI compilation unit that needs it. Make pci_register_host_bridge() static to its compilation unit and convert the existing drivers usage over to pci_scan_root_bus_bridge(). Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Arnd Bergmann --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index fe1eafd637c3..b56dc13f47c2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -461,7 +461,6 @@ struct pci_host_bridge *pci_alloc_host_bridge(size_t priv); struct pci_host_bridge *devm_pci_alloc_host_bridge(struct device *dev, size_t priv); void pci_free_host_bridge(struct pci_host_bridge *bridge); -int pci_register_host_bridge(struct pci_host_bridge *bridge); struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus); void pci_set_host_bridge_release(struct pci_host_bridge *bridge, -- cgit v1.2.3 From 4b855ad37194f7bdbb200ce7a1c7051fecb56a08 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Jun 2017 12:20:57 +0200 Subject: blk-mq: Create hctx for each present CPU Currently we only create hctx for online CPUs, which can lead to a lot of churn due to frequent soft offline / online operations. Instead allocate one for each present CPU to avoid this and dramatically simplify the code. Signed-off-by: Christoph Hellwig Reviewed-by: Jens Axboe Cc: Keith Busch Cc: linux-block@vger.kernel.org Cc: linux-nvme@lists.infradead.org Link: http://lkml.kernel.org/r/20170626102058.10200-3-hch@lst.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c15f22c54535..7f815d915977 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -58,7 +58,6 @@ enum cpuhp_state { CPUHP_XEN_EVTCHN_PREPARE, CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, - CPUHP_BLK_MQ_PREPARE, CPUHP_NET_FLOW_PREPARE, CPUHP_TOPOLOGY_PREPARE, CPUHP_NET_IUCV_PREPARE, -- cgit v1.2.3 From 0607512d0a8d7fac86667466b884095e04b10a59 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 27 Jun 2017 17:35:41 +0200 Subject: ras: mark stub functions as 'inline' With CONFIG_RAS disabled, we get two harmless warnings about unused functions: include/linux/ras.h:37:13: error: 'log_arm_hw_error' defined but not used [-Werror=unused-function] static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } include/linux/ras.h:33:13: error: 'log_non_standard_event' defined but not used [-Werror=unused-function] static void log_non_standard_event(const guid_t *sec_type, Clearly these are meant to be 'inline', like the other stubs in the same header. Fixes: 297b64c74385 ("ras: acpi / apei: generate trace event for unrecognized CPER section") Fixes: e9279e83ad1f ("trace, ras: add ARM processor error trace event") Acked-by: Borislav Petkov Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- include/linux/ras.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ras.h b/include/linux/ras.h index 7d61863ff265..be5338a35d57 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -30,11 +30,13 @@ void log_non_standard_event(const guid_t *sec_type, const u8 sev, const u8 *err, const u32 len); void log_arm_hw_error(struct cper_sec_proc_arm *err); #else -static void log_non_standard_event(const guid_t *sec_type, - const guid_t *fru_id, const char *fru_text, - const u8 sev, const u8 *err, - const u32 len) { return; } -static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } +static inline void +log_non_standard_event(const guid_t *sec_type, + const guid_t *fru_id, const char *fru_text, + const u8 sev, const u8 *err, const u32 len) +{ return; } +static inline void +log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif #endif /* __RAS_H__ */ -- cgit v1.2.3 From 425562429d4f3b134c7390249c23a3f647aad199 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Thu, 22 Jun 2017 12:00:58 +0200 Subject: pinctrl: generic: Add output-enable property Add output-enable generic pin configuration property. This properties allows enabling/disabling pin's output capabilities without actually driving any value on the line. Acked-by: Rob Herring [Added inline elaborations on buffer enabling/disabling] Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 7620eb127cff..231d3075815a 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -73,10 +73,16 @@ * operation, if several modes of operation are supported these can be * passed in the argument on a custom form, else just use argument 1 * to indicate low power mode, argument 0 turns low power mode off. - * @PIN_CONFIG_OUTPUT: this will configure the pin as an output. Use argument - * 1 to indicate high level, argument 0 to indicate low level. (Please - * see Documentation/pinctrl.txt, section "GPIO mode pitfalls" for a - * discussion around this parameter.) + * @PIN_CONFIG_OUTPUT_ENABLE: this will enable the pin's output mode + * without driving a value there. For most platforms this reduces to + * enable the output buffers and then let the pin controller current + * configuration (eg. the currently selected mux function) drive values on + * the line. Use argument 1 to enable output mode, argument 0 to disable + * it. + * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a + * value on the line. Use argument 1 to indicate high level, argument 0 to + * indicate low level. (Please see Documentation/pinctrl.txt, section + * "GPIO mode pitfalls" for a discussion around this parameter.) * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power * supplies, the argument to this parameter (on a custom format) tells * the driver which alternative power source to use. @@ -105,6 +111,7 @@ enum pin_config_param { PIN_CONFIG_INPUT_SCHMITT, PIN_CONFIG_INPUT_SCHMITT_ENABLE, PIN_CONFIG_LOW_POWER_MODE, + PIN_CONFIG_OUTPUT_ENABLE, PIN_CONFIG_OUTPUT, PIN_CONFIG_POWER_SOURCE, PIN_CONFIG_SLEW_RATE, -- cgit v1.2.3 From 6e0c90d691cd5d90569f5918ab03eb76c81f9c6e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 26 Jun 2017 21:28:41 -0700 Subject: libnvdimm, pmem, dax: export a cache control attribute The dax_flush() operation can be turned into a nop on platforms where firmware arranges for cpu caches to be flushed on a power-fail event. The ACPI 6.2 specification defines a mechanism for the platform to indicate this capability so the kernel can select the proper default. However, for other platforms, the administrator must toggle this setting manually. Given this flush setting is a dax-specific mechanism we advertise it through a 'dax' attribute group hanging off a host device. For example, a 'pmem0' block-device gets a 'dax' sysfs-subdirectory with a 'write_cache' attribute to control response to dax cache flush requests. This is similar to the 'queue/write_cache' attribute that appears under block devices. Cc: Jan Kara Cc: Jeff Moyer Cc: Matthew Wilcox Cc: Ross Zwisler Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/dax.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 73fca1bebaf3..8f39db7439c3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -23,6 +23,8 @@ struct dax_operations { void (*flush)(struct dax_device *, pgoff_t, void *, size_t); }; +extern struct attribute_group dax_attribute_group; + #if IS_ENABLED(CONFIG_DAX) struct dax_device *dax_get_by_host(const char *host); void put_dax(struct dax_device *dax_dev); @@ -84,6 +86,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t size); +void dax_write_cache(struct dax_device *dax_dev, bool wc); /* * We use lowest available bit in exceptional entry for locking, one bit for -- cgit v1.2.3 From 0b277961f4484fb3f142caaa1dd1748cb0b2cbee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 9 Jun 2017 09:46:50 -0700 Subject: libnvdimm, pmem: disable dax flushing when pmem is fronting a volatile region The pmem driver attaches to both persistent and volatile memory ranges advertised by the ACPI NFIT. When the region is volatile it is redundant to spend cycles flushing caches at fsync(). Check if the hosting region is volatile and do not set dax_write_cache() if it is. Cc: Jan Kara Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Ross Zwisler Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index b2f659bd661d..a8ee1d0afd70 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -165,4 +165,5 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); void nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); +int nvdimm_has_cache(struct nd_region *nd_region); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 14dc6f04f49dc12614d7e90928b495b8d73cd471 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 27 Jun 2017 23:08:34 -0700 Subject: bpf: Add syscall lookup support for fd array and htab This patch allows userspace to do BPF_MAP_LOOKUP_ELEM on BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_ARRAY_OF_MAPS and BPF_MAP_TYPE_HASH_OF_MAPS. The lookup returns a prog-id or map-id to the userspace. The userspace can then use the BPF_PROG_GET_FD_BY_ID or BPF_MAP_GET_FD_BY_ID to get a fd. Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index deca4e7f2845..5175729270d7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,6 +36,7 @@ struct bpf_map_ops { int fd); void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); + u32 (*map_fd_sys_lookup_elem)(void *ptr); }; struct bpf_map { @@ -288,9 +289,11 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); +int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); void bpf_fd_array_map_clear(struct bpf_map *map); int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); +int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. -- cgit v1.2.3 From 14e494542636b7a685c5bf27e695e3bb9ec3fe7d Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 28 Jun 2017 14:25:00 -0600 Subject: libnvdimm, btt: BTT updates for UEFI 2.7 format The UEFI 2.7 specification defines an updated BTT metadata format, bumping the revision to 2.0. Add support for the new format, while retaining compatibility for the old 1.1 format. Cc: Toshi Kani Cc: Linda Knippers Cc: Dan Williams Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/linux/nd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index 96069c543890..5dc6b695437d 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -24,6 +24,7 @@ enum nvdimm_event { enum nvdimm_claim_class { NVDIMM_CCLASS_NONE, NVDIMM_CCLASS_BTT, + NVDIMM_CCLASS_BTT2, NVDIMM_CCLASS_PFN, NVDIMM_CCLASS_DAX, NVDIMM_CCLASS_UNKNOWN, -- cgit v1.2.3 From 18e9710ee59ce3bd2a2512ddcd3f7ceebe8b8d17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 27 May 2017 11:16:51 +0300 Subject: fs: implement vfs_iter_read using do_iter_read De-dupliate some code and allow for passing the flags argument to vfs_iter_read. Additional it properly updates atime now. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..3f6a4f4efb32 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2789,7 +2789,8 @@ extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); -ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos); +ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, + int flags); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos); /* fs/block_dev.c */ -- cgit v1.2.3 From abbb65899aecfc97bda64b6816d1e501754cfe1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 27 May 2017 11:16:52 +0300 Subject: fs: implement vfs_iter_write using do_iter_write De-dupliate some code and allow for passing the flags argument to vfs_iter_write. Additionally it now properly updates timestamps. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f6a4f4efb32..c67f1f8ee789 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2791,7 +2791,8 @@ extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, int flags); -ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos); +ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, + int flags); /* fs/block_dev.c */ extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); -- cgit v1.2.3 From 9c5f6908de03a4f52ba7364b11fcd6116225480c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Jun 2017 21:39:54 -0400 Subject: copy_{from,to}_user(): move kasan checks and might_fault() out-of-line Signed-off-by: Al Viro --- include/linux/uaccess.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 201418d5e15c..e57328896a16 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -109,8 +109,11 @@ static inline unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; - if (likely(access_ok(VERIFY_READ, from, n))) + might_fault(); + if (likely(access_ok(VERIFY_READ, from, n))) { + kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); + } if (unlikely(res)) memset(to + (n - res), 0, res); return res; @@ -124,8 +127,11 @@ _copy_from_user(void *, const void __user *, unsigned long); static inline unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { - if (access_ok(VERIFY_WRITE, to, n)) + might_fault(); + if (access_ok(VERIFY_WRITE, to, n)) { + kasan_check_read(from, n); n = raw_copy_to_user(to, from, n); + } return n; } #else @@ -146,9 +152,6 @@ copy_from_user(void *to, const void __user *from, unsigned long n) { int sz = __compiletime_object_size(to); - might_fault(); - kasan_check_write(to, n); - if (likely(sz < 0 || sz >= n)) { check_object_size(to, n, false); n = _copy_from_user(to, from, n); @@ -165,9 +168,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n) { int sz = __compiletime_object_size(from); - kasan_check_read(from, n); - might_fault(); - if (likely(sz < 0 || sz >= n)) { check_object_size(from, n, true); n = _copy_to_user(to, from, n); -- cgit v1.2.3 From b0377fedb6528087ed319b0d054d6ed82240372c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Jun 2017 21:42:43 -0400 Subject: copy_{to,from}_user(): consolidate object size checks ... and move them into thread_info.h, next to check_object_size() Signed-off-by: Al Viro --- include/linux/thread_info.h | 27 +++++++++++++++++++++++++++ include/linux/uaccess.h | 28 ++-------------------------- 2 files changed, 29 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index d7d3ea637dd0..250a27614328 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -113,6 +113,33 @@ static inline void check_object_size(const void *ptr, unsigned long n, { } #endif /* CONFIG_HARDENED_USERCOPY */ +extern void __compiletime_error("copy source size is too small") +__bad_copy_from(void); +extern void __compiletime_error("copy destination size is too small") +__bad_copy_to(void); + +static inline void copy_overflow(int size, unsigned long count) +{ + WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} + +static __always_inline bool +check_copy_size(const void *addr, size_t bytes, bool is_source) +{ + int sz = __compiletime_object_size(addr); + if (unlikely(sz >= 0 && sz < bytes)) { + if (!__builtin_constant_p(bytes)) + copy_overflow(sz, bytes); + else if (is_source) + __bad_copy_from(); + else + __bad_copy_to(); + return false; + } + check_object_size(addr, bytes, is_source); + return true; +} + #ifndef arch_setup_new_exec static inline void arch_setup_new_exec(void) { } #endif diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index e57328896a16..80b587085e79 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -139,43 +139,19 @@ extern unsigned long _copy_to_user(void __user *, const void *, unsigned long); #endif -extern void __compiletime_error("usercopy buffer size is too small") -__bad_copy_user(void); - -static inline void copy_user_overflow(int size, unsigned long count) -{ - WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); -} - static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - int sz = __compiletime_object_size(to); - - if (likely(sz < 0 || sz >= n)) { - check_object_size(to, n, false); + if (likely(check_copy_size(to, n, false))) n = _copy_from_user(to, from, n); - } else if (!__builtin_constant_p(n)) - copy_user_overflow(sz, n); - else - __bad_copy_user(); - return n; } static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - int sz = __compiletime_object_size(from); - - if (likely(sz < 0 || sz >= n)) { - check_object_size(from, n, true); + if (likely(check_copy_size(from, n, true))) n = _copy_to_user(to, from, n); - } else if (!__builtin_constant_p(n)) - copy_user_overflow(sz, n); - else - __bad_copy_user(); - return n; } #ifdef CONFIG_COMPAT -- cgit v1.2.3 From aa28de275a248879f9828cb9f7ee7e119c72ff96 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Jun 2017 21:45:10 -0400 Subject: iov_iter/hardening: move object size checks to inlined part There we actually have useful information about object sizes. Note: this patch has them done for all iov_iter flavours. Right now we do them twice in iovec case, but that'll change very shortly. Signed-off-by: Al Viro --- include/linux/uio.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index f2d36a3d3005..243e2362fe1a 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -10,6 +10,7 @@ #define __LINUX_UIO_H #include +#include #include struct page; @@ -91,11 +92,58 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); -size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); -size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); -bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); -size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); -bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); + +size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); +size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); +bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); +size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); +bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); + +static __always_inline __must_check +size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) +{ + if (unlikely(!check_copy_size(addr, bytes, true))) + return bytes; + else + return _copy_to_iter(addr, bytes, i); +} + +static __always_inline __must_check +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + if (unlikely(!check_copy_size(addr, bytes, false))) + return bytes; + else + return _copy_from_iter(addr, bytes, i); +} + +static __always_inline __must_check +bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) +{ + if (unlikely(!check_copy_size(addr, bytes, false))) + return false; + else + return _copy_from_iter_full(addr, bytes, i); +} + +static __always_inline __must_check +size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) +{ + if (unlikely(!check_copy_size(addr, bytes, false))) + return bytes; + else + return _copy_from_iter_nocache(addr, bytes, i); +} + +static __always_inline __must_check +bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) +{ + if (unlikely(!check_copy_size(addr, bytes, false))) + return false; + else + return _copy_from_iter_full_nocache(addr, bytes, i); +} + size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); -- cgit v1.2.3 From 3cb57df37bf3c87c7bbd2bd6f94d9d48c1c8e2ae Mon Sep 17 00:00:00 2001 From: Peter Oh Date: Tue, 27 Jun 2017 15:07:29 -0700 Subject: ieee80211: update public action codes Update Public Action field values as updated in IEEE Std 802.11-2016, so that modules/drivers can refer it. Signed-off-by: Peter Oh Reviewed-by: Johannes Berg Signed-off-by: Kalle Valo --- include/linux/ieee80211.h | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 34e1bcd2d7ff..55a604ad459f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2113,10 +2113,43 @@ enum ieee80211_key_len { #define PMK_MAX_LEN 48 -/* Public action codes */ +/* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */ enum ieee80211_pub_actioncode { + WLAN_PUB_ACTION_20_40_BSS_COEX = 0, + WLAN_PUB_ACTION_DSE_ENABLEMENT = 1, + WLAN_PUB_ACTION_DSE_DEENABLEMENT = 2, + WLAN_PUB_ACTION_DSE_REG_LOC_ANN = 3, WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, + WLAN_PUB_ACTION_DSE_MSMT_REQ = 5, + WLAN_PUB_ACTION_DSE_MSMT_RESP = 6, + WLAN_PUB_ACTION_MSMT_PILOT = 7, + WLAN_PUB_ACTION_DSE_PC = 8, + WLAN_PUB_ACTION_VENDOR_SPECIFIC = 9, + WLAN_PUB_ACTION_GAS_INITIAL_REQ = 10, + WLAN_PUB_ACTION_GAS_INITIAL_RESP = 11, + WLAN_PUB_ACTION_GAS_COMEBACK_REQ = 12, + WLAN_PUB_ACTION_GAS_COMEBACK_RESP = 13, WLAN_PUB_ACTION_TDLS_DISCOVER_RES = 14, + WLAN_PUB_ACTION_LOC_TRACK_NOTI = 15, + WLAN_PUB_ACTION_QAB_REQUEST_FRAME = 16, + WLAN_PUB_ACTION_QAB_RESPONSE_FRAME = 17, + WLAN_PUB_ACTION_QMF_POLICY = 18, + WLAN_PUB_ACTION_QMF_POLICY_CHANGE = 19, + WLAN_PUB_ACTION_QLOAD_REQUEST = 20, + WLAN_PUB_ACTION_QLOAD_REPORT = 21, + WLAN_PUB_ACTION_HCCA_TXOP_ADVERT = 22, + WLAN_PUB_ACTION_HCCA_TXOP_RESPONSE = 23, + WLAN_PUB_ACTION_PUBLIC_KEY = 24, + WLAN_PUB_ACTION_CHANNEL_AVAIL_QUERY = 25, + WLAN_PUB_ACTION_CHANNEL_SCHEDULE_MGMT = 26, + WLAN_PUB_ACTION_CONTACT_VERI_SIGNAL = 27, + WLAN_PUB_ACTION_GDD_ENABLEMENT_REQ = 28, + WLAN_PUB_ACTION_GDD_ENABLEMENT_RESP = 29, + WLAN_PUB_ACTION_NETWORK_CHANNEL_CONTROL = 30, + WLAN_PUB_ACTION_WHITE_SPACE_MAP_ANN = 31, + WLAN_PUB_ACTION_FTM_REQUEST = 32, + WLAN_PUB_ACTION_FTM = 33, + WLAN_PUB_ACTION_FILS_DISCOVERY = 34, }; /* TDLS action codes */ -- cgit v1.2.3 From c0edd7c9acd0eaee149ab6cb4441cc71a1af87f0 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 24 Jun 2017 11:45:06 -0700 Subject: nanosleep: Use get_timespec64() and put_timespec64() Usage of these apis and their compat versions makes the syscalls: clock_nanosleep and nanosleep and their compat implementations simpler. This is a preparatory patch to isolate data conversions to struct timespec64 at userspace boundaries. This helps contain the changes needed to transition to new y2038 safe types. Signed-off-by: Deepa Dinamani Signed-off-by: Al Viro --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 255edd5e7a74..012c37fdb688 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -453,7 +453,7 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer, /* Precise sleep: */ -extern int nanosleep_copyout(struct restart_block *, struct timespec *); +extern int nanosleep_copyout(struct restart_block *, struct timespec64 *); extern long hrtimer_nanosleep(const struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid); -- cgit v1.2.3 From 3859a271a003aba01e45b85c9d8b355eb7bf25f9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 28 Oct 2016 01:22:25 -0700 Subject: randstruct: Mark various structs for randomization This marks many critical kernel structures for randomization. These are structures that have been targeted in the past in security exploits, or contain functions pointers, pointers to function pointer tables, lists, workqueues, ref-counters, credentials, permissions, or are otherwise sensitive. This initial list was extracted from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. Left out of this list is task_struct, which requires special handling and will be covered in a subsequent patch. Signed-off-by: Kees Cook --- include/linux/binfmts.h | 4 ++-- include/linux/cdev.h | 2 +- include/linux/cred.h | 4 ++-- include/linux/dcache.h | 2 +- include/linux/fs.h | 17 +++++++++-------- include/linux/fs_struct.h | 2 +- include/linux/ipc.h | 2 +- include/linux/ipc_namespace.h | 2 +- include/linux/key-type.h | 4 ++-- include/linux/kmod.h | 2 +- include/linux/kobject.h | 2 +- include/linux/lsm_hooks.h | 4 ++-- include/linux/mm_types.h | 4 ++-- include/linux/module.h | 4 ++-- include/linux/mount.h | 2 +- include/linux/msg.h | 2 +- include/linux/path.h | 2 +- include/linux/pid_namespace.h | 2 +- include/linux/proc_ns.h | 2 +- include/linux/sched.h | 2 +- include/linux/sched/signal.h | 2 +- include/linux/sem.h | 2 +- include/linux/shm.h | 2 +- include/linux/sysctl.h | 2 +- include/linux/tty.h | 2 +- include/linux/tty_driver.h | 4 ++-- include/linux/user_namespace.h | 2 +- include/linux/utsname.h | 2 +- 28 files changed, 43 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 05488da3aee9..3ae9013eeaaa 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -46,7 +46,7 @@ struct linux_binprm { unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; -}; +} __randomize_layout; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 #define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT) @@ -81,7 +81,7 @@ struct linux_binfmt { int (*load_shlib)(struct file *); int (*core_dump)(struct coredump_params *cprm); unsigned long min_coredump; /* minimal dump size */ -}; +} __randomize_layout; extern void __register_binfmt(struct linux_binfmt *fmt, int insert); diff --git a/include/linux/cdev.h b/include/linux/cdev.h index 408bc09ce497..cb28eb21e3ca 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -17,7 +17,7 @@ struct cdev { struct list_head list; dev_t dev; unsigned int count; -}; +} __randomize_layout; void cdev_init(struct cdev *, const struct file_operations *); diff --git a/include/linux/cred.h b/include/linux/cred.h index b03e7d049a64..82c8a9e1aabb 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -31,7 +31,7 @@ struct group_info { atomic_t usage; int ngroups; kgid_t gid[0]; -}; +} __randomize_layout; /** * get_group_info - Get a reference to a group info structure @@ -145,7 +145,7 @@ struct cred { struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ -}; +} __randomize_layout; extern void __put_cred(struct cred *); extern void exit_creds(struct task_struct *); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d2e38dc6172c..7eb262e13d3c 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -113,7 +113,7 @@ struct dentry { struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; } d_u; -}; +} __randomize_layout; /* * dentry->d_lock spinlock nesting subclasses: diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..8f28143486c4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -275,7 +275,7 @@ struct kiocb { void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); void *private; int ki_flags; -}; +} __randomize_layout; static inline bool is_sync_kiocb(struct kiocb *kiocb) { @@ -392,7 +392,7 @@ struct address_space { gfp_t gfp_mask; /* implicit gfp mask for allocations */ struct list_head private_list; /* ditto */ void *private_data; /* ditto */ -} __attribute__((aligned(sizeof(long)))); +} __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but * must be enforced here for CRIS, to let the least significant bit @@ -435,7 +435,7 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; -}; +} __randomize_layout; /* * Radix-tree tags, for tagging dirty and writeback pages within the pagecache @@ -653,7 +653,7 @@ struct inode { #endif void *i_private; /* fs or device private pointer */ -}; +} __randomize_layout; static inline unsigned int i_blocksize(const struct inode *node) { @@ -868,7 +868,8 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ +} __randomize_layout + __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; @@ -1005,7 +1006,7 @@ struct file_lock { int state; /* state of grant or error if -ve */ } afs; } fl_u; -}; +} __randomize_layout; struct file_lock_context { spinlock_t flc_lock; @@ -1404,7 +1405,7 @@ struct super_block { spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ -}; +} __randomize_layout; /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can @@ -1690,7 +1691,7 @@ struct file_operations { u64); ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *, u64); -}; +} __randomize_layout; struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 0efc3e62843a..7a026240cbb1 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -12,7 +12,7 @@ struct fs_struct { int umask; int in_exec; struct path root, pwd; -}; +} __randomize_layout; extern struct kmem_cache *fs_cachep; diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 71fd92d81b26..ea0eb0b5f98c 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -20,6 +20,6 @@ struct kern_ipc_perm { umode_t mode; unsigned long seq; void *security; -} ____cacheline_aligned_in_smp; +} ____cacheline_aligned_in_smp __randomize_layout; #endif /* _LINUX_IPC_H */ diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 848e5796400e..65327ee0936b 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -61,7 +61,7 @@ struct ipc_namespace { struct ucounts *ucounts; struct ns_common ns; -}; +} __randomize_layout; extern struct ipc_namespace init_ipc_ns; extern spinlock_t mq_lock; diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 8496cf64575c..9520fc3c3b9a 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -45,7 +45,7 @@ struct key_preparsed_payload { size_t datalen; /* Raw datalen */ size_t quotalen; /* Quota length for proposed payload */ time_t expiry; /* Expiry time of key */ -}; +} __randomize_layout; typedef int (*request_key_actor_t)(struct key_construction *key, const char *op, void *aux); @@ -158,7 +158,7 @@ struct key_type { /* internal fields */ struct list_head link; /* link in types list */ struct lock_class_key lock_class; /* key->sem lock class */ -}; +} __randomize_layout; extern struct key_type key_type_keyring; diff --git a/include/linux/kmod.h b/include/linux/kmod.h index c4e441e00db5..655082c88fd9 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -64,7 +64,7 @@ struct subprocess_info { int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); void *data; -}; +} __randomize_layout; extern int call_usermodehelper(const char *path, char **argv, char **envp, int wait); diff --git a/include/linux/kobject.h b/include/linux/kobject.h index ca85cb80e99a..084513350317 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -172,7 +172,7 @@ struct kset { spinlock_t list_lock; struct kobject kobj; const struct kset_uevent_ops *uevent_ops; -}; +} __randomize_layout; extern void kset_init(struct kset *kset); extern int __must_check kset_register(struct kset *kset); diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 080f34e66017..565163fc9ad4 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1876,7 +1876,7 @@ struct security_hook_heads { struct list_head audit_rule_match; struct list_head audit_rule_free; #endif /* CONFIG_AUDIT */ -}; +} __randomize_layout; /* * Security module hook list structure. @@ -1887,7 +1887,7 @@ struct security_hook_list { struct list_head *head; union security_list_options hook; char *lsm; -}; +} __randomize_layout; /* * Initializing a security_hook_list structure takes diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 45cdb27791a3..ff151814a02d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -342,7 +342,7 @@ struct vm_area_struct { struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; -}; +} __randomize_layout; struct core_thread { struct task_struct *task; @@ -500,7 +500,7 @@ struct mm_struct { atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; -}; +} __randomize_layout; extern struct mm_struct init_mm; diff --git a/include/linux/module.h b/include/linux/module.h index 21f56393602f..d93111d7def6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -45,7 +45,7 @@ struct module_kobject { struct kobject *drivers_dir; struct module_param_attrs *mp; struct completion *kobj_completion; -}; +} __randomize_layout; struct module_attribute { struct attribute attr; @@ -475,7 +475,7 @@ struct module { ctor_fn_t *ctors; unsigned int num_ctors; #endif -} ____cacheline_aligned; +} ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} #endif diff --git a/include/linux/mount.h b/include/linux/mount.h index 8e0352af06b7..1ce85e6fd95f 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -67,7 +67,7 @@ struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ int mnt_flags; -}; +} __randomize_layout; struct file; /* forward dec */ struct path; diff --git a/include/linux/msg.h b/include/linux/msg.h index f3f302f9c197..a001305f5a79 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -29,7 +29,7 @@ struct msg_queue { struct list_head q_messages; struct list_head q_receivers; struct list_head q_senders; -}; +} __randomize_layout; /* Helper routines for sys_msgsnd and sys_msgrcv */ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, diff --git a/include/linux/path.h b/include/linux/path.h index d1372186f431..cde895cc4af4 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -7,7 +7,7 @@ struct vfsmount; struct path { struct vfsmount *mnt; struct dentry *dentry; -}; +} __randomize_layout; extern void path_get(const struct path *); extern void path_put(const struct path *); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index c2a989dee876..b09136f88cf4 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -52,7 +52,7 @@ struct pid_namespace { int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; -}; +} __randomize_layout; extern struct pid_namespace init_pid_ns; diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 58ab28d81fc2..06844b54dfc1 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -21,7 +21,7 @@ struct proc_ns_operations { int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); struct user_namespace *(*owner)(struct ns_common *ns); struct ns_common *(*get_parent)(struct ns_common *ns); -}; +} __randomize_layout; extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b69fc650201..f833254fce00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -408,7 +408,7 @@ struct sched_rt_entity { /* rq "owned" by this entity/group: */ struct rt_rq *my_q; #endif -}; +} __randomize_layout; struct sched_dl_entity { struct rb_node rb_node; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index c06d63b3a583..2a0dd40b15db 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -222,7 +222,7 @@ struct signal_struct { struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations * (notably. ptrace) */ -}; +} __randomize_layout; /* * Bits in flags field of signal_struct. diff --git a/include/linux/sem.h b/include/linux/sem.h index 9edec926e9d9..23bcbdfad4a6 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -21,7 +21,7 @@ struct sem_array { int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ unsigned int use_global_lock;/* >0: global lock required */ -}; +} __randomize_layout; #ifdef CONFIG_SYSVIPC diff --git a/include/linux/shm.h b/include/linux/shm.h index 04e881829625..0fb7061ec54c 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -22,7 +22,7 @@ struct shmid_kernel /* private to the kernel */ /* The task created the shm object. NULL if the task is dead. */ struct task_struct *shm_creator; struct list_head shm_clist; /* list by creator */ -}; +} __randomize_layout; /* shm_mode upper byte flags */ #define SHM_DEST 01000 /* segment will be destroyed on last detach */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 80d07816def0..9ddeef2c03e2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -117,7 +117,7 @@ struct ctl_table struct ctl_table_poll *poll; void *extra1; void *extra2; -}; +} __randomize_layout; struct ctl_node { struct rb_node node; diff --git a/include/linux/tty.h b/include/linux/tty.h index d07cd2105a6c..73f8d0977bb0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -333,7 +333,7 @@ struct tty_struct { /* If the tty has a pending do_SAK, queue it here - akpm */ struct work_struct SAK_work; struct tty_port *port; -}; +} __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ struct tty_file_private { diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index b742b5e47cc2..00b2213f6a35 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -291,7 +291,7 @@ struct tty_operations { void (*poll_put_char)(struct tty_driver *driver, int line, char ch); #endif const struct file_operations *proc_fops; -}; +} __randomize_layout; struct tty_driver { int magic; /* magic number for this structure */ @@ -325,7 +325,7 @@ struct tty_driver { const struct tty_operations *ops; struct list_head tty_drivers; -}; +} __randomize_layout; extern struct list_head tty_drivers; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 32354b4b4b2b..b3575ce29148 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -66,7 +66,7 @@ struct user_namespace { #endif struct ucounts *ucounts; int ucount_max[UCOUNT_COUNTS]; -}; +} __randomize_layout; struct ucounts { struct hlist_node node; diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 60f0bb83b313..da826ed059cf 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -26,7 +26,7 @@ struct uts_namespace { struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; -}; +} __randomize_layout; extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS -- cgit v1.2.3 From 29e48ce87f1eaaa4b1fe3d9af90c586ac2d1fb74 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 5 Apr 2017 22:43:33 -0700 Subject: task_struct: Allow randomized layout This marks most of the layout of task_struct as randomizable, but leaves thread_info and scheduler state untouched at the start, and thread_struct untouched at the end. Other parts of the kernel use unnamed structures, but the 0-day builder using gcc-4.4 blows up on static initializers. Officially, it's documented as only working on gcc 4.6 and later, which further confuses me: https://gcc.gnu.org/wiki/C11Status The structure layout randomization already requires gcc 4.7, but instead of depending on the plugin being enabled, just check the gcc versions for wider build testing. At Linus's suggestion, the marking is hidden in a macro to reduce how ugly it looks. Additionally, indenting is left unchanged since it would make things harder to read. Randomization of task_struct is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. Cc: Linus Torvalds Signed-off-by: Kees Cook --- include/linux/compiler-gcc.h | 13 ++++++++++++- include/linux/compiler.h | 5 +++++ include/linux/sched.h | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7deaae3dc87d..c4a66c036692 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -231,6 +231,7 @@ #endif /* GCC_VERSION >= 40500 */ #if GCC_VERSION >= 40600 + /* * When used with Link Time Optimization, gcc can optimize away C functions or * variables which are referenced only from assembly code. __visible tells the @@ -238,7 +239,17 @@ * this. */ #define __visible __attribute__((externally_visible)) -#endif + +/* + * RANDSTRUCT_PLUGIN wants to use an anonymous struct, but it is only + * possible since GCC 4.6. To provide as much build testing coverage + * as possible, this is used for all GCC 4.6+ builds, and not just on + * RANDSTRUCT_PLUGIN builds. + */ +#define randomized_struct_fields_start struct { +#define randomized_struct_fields_end } __randomize_layout; + +#endif /* GCC_VERSION >= 40600 */ #if GCC_VERSION >= 40900 && !defined(__CHECKER__) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 55ee9ee814f8..0b4ac3e8c63e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -456,6 +456,11 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s # define __no_randomize_layout #endif +#ifndef randomized_struct_fields_start +# define randomized_struct_fields_start +# define randomized_struct_fields_end +#endif + /* * Tell gcc if a function is cold. The compiler will assume any path * directly leading to the call is unlikely. diff --git a/include/linux/sched.h b/include/linux/sched.h index f833254fce00..e2ad3531e7fe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -490,6 +490,13 @@ struct task_struct { #endif /* -1 unrunnable, 0 runnable, >0 stopped: */ volatile long state; + + /* + * This begins the randomizable portion of task_struct. Only + * scheduling-critical items should be added above here. + */ + randomized_struct_fields_start + void *stack; atomic_t usage; /* Per task flags (PF_*), defined further below: */ @@ -1051,6 +1058,13 @@ struct task_struct { /* Used by LSM modules for access restriction: */ void *security; #endif + + /* + * New fields for task_struct should be added above here, so that + * they are included in the randomized portion of task_struct. + */ + randomized_struct_fields_end + /* CPU-specific state of this task: */ struct thread_struct thread; -- cgit v1.2.3 From 53869cebce4bc53f71a080e7830600d4ae1ab712 Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:07:57 +0300 Subject: net: convert nf_bridge_info.use from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a17e235639ae..005793e01bd2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -252,7 +252,7 @@ struct nf_conntrack { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { - atomic_t use; + refcount_t use; enum { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, @@ -3589,13 +3589,13 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct) #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { - if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) + if (nf_bridge && refcount_dec_and_test(&nf_bridge->use)) kfree(nf_bridge); } static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) { if (nf_bridge) - atomic_inc(&nf_bridge->use); + refcount_inc(&nf_bridge->use); } #endif /* CONFIG_BRIDGE_NETFILTER */ static inline void nf_reset(struct sk_buff *skb) -- cgit v1.2.3 From 633547973ffc32fd2c815639d4675e1531f0896f Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:07:58 +0300 Subject: net: convert sk_buff.users from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 005793e01bd2..90cbd86152da 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -761,7 +761,7 @@ struct sk_buff { unsigned char *head, *data; unsigned int truesize; - atomic_t users; + refcount_t users; }; #ifdef __KERNEL__ @@ -872,9 +872,9 @@ static inline bool skb_unref(struct sk_buff *skb) { if (unlikely(!skb)) return false; - if (likely(atomic_read(&skb->users) == 1)) + if (likely(refcount_read(&skb->users) == 1)) smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) + else if (likely(!refcount_dec_and_test(&skb->users))) return false; return true; @@ -1283,7 +1283,7 @@ static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, */ static inline struct sk_buff *skb_get(struct sk_buff *skb) { - atomic_inc(&skb->users); + refcount_inc(&skb->users); return skb; } @@ -1384,7 +1384,7 @@ static inline void __skb_header_release(struct sk_buff *skb) */ static inline int skb_shared(const struct sk_buff *skb) { - return atomic_read(&skb->users) != 1; + return refcount_read(&skb->users) != 1; } /** -- cgit v1.2.3 From 2638595afccf6554bfe55268ff9b2d3ac3dff2e6 Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:07:59 +0300 Subject: net: convert sk_buff_fclones.fclone_ref from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 90cbd86152da..d0b9f3846eab 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -915,7 +915,7 @@ struct sk_buff_fclones { struct sk_buff skb2; - atomic_t fclone_ref; + refcount_t fclone_ref; }; /** @@ -935,7 +935,7 @@ static inline bool skb_fclone_busy(const struct sock *sk, fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && - atomic_read(&fclones->fclone_ref) > 1 && + refcount_read(&fclones->fclone_ref) > 1 && fclones->skb2.sk == sk; } -- cgit v1.2.3 From 14afee4b6092fde451ee17604e5f5c89da33e71e Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:08:00 +0300 Subject: net: convert sock.sk_wmem_alloc from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index c1da539f5e28..4d97a89da066 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -254,7 +254,7 @@ static inline void atm_return(struct atm_vcc *vcc,int truesize) static inline int atm_may_send(struct atm_vcc *vcc,unsigned int size) { - return (size + atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) < + return (size + refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) < sk_atm(vcc)->sk_sndbuf; } -- cgit v1.2.3 From 8851ab526791530d00bbbd0952512d68684a44b8 Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:08:02 +0300 Subject: net: convert ip_mc_list.refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- include/linux/igmp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 12f6fba6d21a..97caf1821de8 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -18,6 +18,7 @@ #include #include #include +#include #include static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb) @@ -84,7 +85,7 @@ struct ip_mc_list { struct ip_mc_list __rcu *next_hash; struct timer_list timer; int users; - atomic_t refcnt; + refcount_t refcnt; spinlock_t lock; char tm_running; char reporter; -- cgit v1.2.3 From 7658b36f1b3122c298213eed344f622e836b281b Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:08:03 +0300 Subject: net: convert in_device.refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index e7c04c4e4bcd..fb3f809e34e4 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -11,6 +11,7 @@ #include #include #include +#include struct ipv4_devconf { void *sysctl; @@ -22,7 +23,7 @@ struct ipv4_devconf { struct in_device { struct net_device *dev; - atomic_t refcnt; + refcount_t refcnt; int dead; struct in_ifaddr *ifa_list; /* IP ifaddr chain */ @@ -219,7 +220,7 @@ static inline struct in_device *in_dev_get(const struct net_device *dev) rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) - atomic_inc(&in_dev->refcnt); + refcount_inc(&in_dev->refcnt); rcu_read_unlock(); return in_dev; } @@ -240,12 +241,12 @@ void in_dev_finish_destroy(struct in_device *idev); static inline void in_dev_put(struct in_device *idev) { - if (atomic_dec_and_test(&idev->refcnt)) + if (refcount_dec_and_test(&idev->refcnt)) in_dev_finish_destroy(idev); } -#define __in_dev_put(idev) atomic_dec(&(idev)->refcnt) -#define in_dev_hold(idev) atomic_inc(&(idev)->refcnt) +#define __in_dev_put(idev) refcount_dec(&(idev)->refcnt) +#define in_dev_hold(idev) refcount_inc(&(idev)->refcnt) #endif /* __KERNEL__ */ -- cgit v1.2.3 From 433cea4d9bbb83cc848b80c51bb849a2ceb49379 Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:08:04 +0300 Subject: net: convert netpoll_info.refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- include/linux/netpoll.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 1828900c9411..27c0aaa22cb0 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -11,6 +11,7 @@ #include #include #include +#include union inet_addr { __u32 all[4]; @@ -34,7 +35,7 @@ struct netpoll { }; struct netpoll_info { - atomic_t refcnt; + refcount_t refcnt; struct semaphore dev_lock; -- cgit v1.2.3 From 7db5bb33add5afe6c64e00516b0c928bfc937466 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Fri, 30 Jun 2017 20:53:24 -0700 Subject: libnvdimm, acpi, nfit: Add bus level dsm mask for pass thru. Add a bus level dsm_mask to nvdimm_bus_descriptor to allow the passthru calling mechanism to specify a different mask from the cmd_mask. Populate bus_dsm_mask and use it to filter dsm calls that user can make through the pass thru interface. Signed-off-by: Jerry Hoemann [djbw: use command number constants instead of a magic mask value] Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 4b9f178c82e6..6aee1a6e4e63 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -55,6 +55,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; + unsigned long bus_dsm_mask; unsigned long cmd_mask; struct module *module; char *provider_name; -- cgit v1.2.3 From ae146d9b76589d636d11c5e4382bbba2fe8bdb9b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:12 +0800 Subject: sctp: remove the typedef sctp_sctphdr_t This patch is to remove the typedef sctp_sctphdr_t, and replace with struct sctphdr in the places where it's using this typedef. It is also to fix some indents and use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 7a4804c4a593..85540ec4b561 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -57,12 +57,12 @@ #include /* Section 3.1. SCTP Common Header Format */ -typedef struct sctphdr { +struct sctphdr { __be16 source; __be16 dest; __be32 vtag; __le32 checksum; -} sctp_sctphdr_t; +}; static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) { -- cgit v1.2.3 From 922dbc5be2186659d2c453a53f2ae569e55b6101 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:13 +0800 Subject: sctp: remove the typedef sctp_chunkhdr_t This patch is to remove the typedef sctp_chunkhdr_t, and replace with struct sctp_chunkhdr in the places where it's using this typedef. It is also to fix some indents and use sizeof(variable) instead of sizeof(type)., especially in sctp_new. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 85540ec4b561..9ad5b9e8df78 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -70,11 +70,11 @@ static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) } /* Section 3.2. Chunk Field Descriptions. */ -typedef struct sctp_chunkhdr { +struct sctp_chunkhdr { __u8 type; __u8 flags; __be16 length; -} sctp_chunkhdr_t; +}; /* Section 3.2. Chunk Type Values. @@ -236,8 +236,8 @@ typedef struct sctp_datahdr { } sctp_datahdr_t; typedef struct sctp_data_chunk { - sctp_chunkhdr_t chunk_hdr; - sctp_datahdr_t data_hdr; + struct sctp_chunkhdr chunk_hdr; + sctp_datahdr_t data_hdr; } sctp_data_chunk_t; /* DATA Chuck Specific Flags */ @@ -267,7 +267,7 @@ typedef struct sctp_inithdr { } sctp_inithdr_t; typedef struct sctp_init_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_inithdr_t init_hdr; } sctp_init_chunk_t; @@ -386,7 +386,7 @@ typedef struct sctp_sackhdr { } sctp_sackhdr_t; typedef struct sctp_sack_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_sackhdr_t sack_hdr; } sctp_sack_chunk_t; @@ -403,7 +403,7 @@ typedef struct sctp_heartbeathdr { } sctp_heartbeathdr_t; typedef struct sctp_heartbeat_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_heartbeathdr_t hb_hdr; } sctp_heartbeat_chunk_t; @@ -413,7 +413,7 @@ typedef struct sctp_heartbeat_chunk { * chunk descriptor. */ typedef struct sctp_abort_chunk { - sctp_chunkhdr_t uh; + struct sctp_chunkhdr uh; } sctp_abort_chunk_t; @@ -425,8 +425,8 @@ typedef struct sctp_shutdownhdr { } sctp_shutdownhdr_t; struct sctp_shutdown_chunk_t { - sctp_chunkhdr_t chunk_hdr; - sctp_shutdownhdr_t shutdown_hdr; + struct sctp_chunkhdr chunk_hdr; + sctp_shutdownhdr_t shutdown_hdr; }; /* RFC 2960. Section 3.3.10 Operation Error (ERROR) (9) */ @@ -438,8 +438,8 @@ typedef struct sctp_errhdr { } sctp_errhdr_t; typedef struct sctp_operr_chunk { - sctp_chunkhdr_t chunk_hdr; - sctp_errhdr_t err_hdr; + struct sctp_chunkhdr chunk_hdr; + sctp_errhdr_t err_hdr; } sctp_operr_chunk_t; /* RFC 2960 3.3.10 - Operation Error @@ -528,7 +528,7 @@ typedef struct sctp_ecnehdr { } sctp_ecnehdr_t; typedef struct sctp_ecne_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_ecnehdr_t ence_hdr; } sctp_ecne_chunk_t; @@ -540,7 +540,7 @@ typedef struct sctp_cwrhdr { } sctp_cwrhdr_t; typedef struct sctp_cwr_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_cwrhdr_t cwr_hdr; } sctp_cwr_chunk_t; @@ -649,7 +649,7 @@ typedef struct sctp_addiphdr { } sctp_addiphdr_t; typedef struct sctp_addip_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_addiphdr_t addip_hdr; } sctp_addip_chunk_t; @@ -709,7 +709,7 @@ typedef struct sctp_authhdr { } sctp_authhdr_t; typedef struct sctp_auth_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; sctp_authhdr_t auth_hdr; } sctp_auth_chunk_t; @@ -719,7 +719,7 @@ struct sctp_infox { }; struct sctp_reconf_chunk { - sctp_chunkhdr_t chunk_hdr; + struct sctp_chunkhdr chunk_hdr; __u8 params[0]; }; -- cgit v1.2.3 From 6d85e68f4cde48f8c2fac6d9c00ca6988cf6e327 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:14 +0800 Subject: sctp: remove the typedef sctp_cid_t This patch is to remove the typedef sctp_cid_t, and replace with struct sctp_cid in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 9ad5b9e8df78..6d7b8846c607 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -82,7 +82,7 @@ struct sctp_chunkhdr { * Value field. It takes a value from 0 to 254. The value of 255 is * reserved for future use as an extension field. */ -typedef enum { +enum sctp_cid { SCTP_CID_DATA = 0, SCTP_CID_INIT = 1, SCTP_CID_INIT_ACK = 2, @@ -109,7 +109,7 @@ typedef enum { SCTP_CID_ASCONF = 0xC1, SCTP_CID_ASCONF_ACK = 0x80, SCTP_CID_RECONF = 0x82, -} sctp_cid_t; /* enum */ +}; /* enum */ /* Section 3.2 -- cgit v1.2.3 From ec431c2cd55c4122e729b7dc45956653a038614b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:15 +0800 Subject: sctp: remove the typedef sctp_cid_action_t Remove this typedef, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 6d7b8846c607..ffdccb4da7e5 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -117,12 +117,12 @@ enum sctp_cid { * the action that must be taken if the processing endpoint does not * recognize the Chunk Type. */ -typedef enum { +enum { SCTP_CID_ACTION_DISCARD = 0x00, SCTP_CID_ACTION_DISCARD_ERR = 0x40, SCTP_CID_ACTION_SKIP = 0x80, SCTP_CID_ACTION_SKIP_ERR = 0xc0, -} sctp_cid_action_t; +}; enum { SCTP_CID_ACTION_MASK = 0xc0, }; -- cgit v1.2.3 From 3c9187049214127d3401926b033d05eb75d69c39 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:16 +0800 Subject: sctp: remove the typedef sctp_paramhdr_t This patch is to remove the typedef sctp_paramhdr_t, and replace with struct sctp_paramhdr in the places where it's using this typedef. It is also to fix some indents and use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index ffdccb4da7e5..142bb6aa88eb 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -162,10 +162,10 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 }; * Section 3.2.1 Optional/Variable-length Parmaeter Format. */ -typedef struct sctp_paramhdr { +struct sctp_paramhdr { __be16 type; __be16 length; -} sctp_paramhdr_t; +}; typedef enum { @@ -274,37 +274,37 @@ typedef struct sctp_init_chunk { /* Section 3.3.2.1. IPv4 Address Parameter (5) */ typedef struct sctp_ipv4addr_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; struct in_addr addr; } sctp_ipv4addr_param_t; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ typedef struct sctp_ipv6addr_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; struct in6_addr addr; } sctp_ipv6addr_param_t; /* Section 3.3.2.1 Cookie Preservative (9) */ typedef struct sctp_cookie_preserve_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __be32 lifespan_increment; } sctp_cookie_preserve_param_t; /* Section 3.3.2.1 Host Name Address (11) */ typedef struct sctp_hostname_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; uint8_t hostname[0]; } sctp_hostname_param_t; /* Section 3.3.2.1 Supported Address Types (12) */ typedef struct sctp_supported_addrs_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __be16 types[0]; } sctp_supported_addrs_param_t; /* Appendix A. ECN Capable (32768) */ typedef struct sctp_ecn_capable_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; } sctp_ecn_capable_param_t; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ @@ -321,19 +321,19 @@ typedef struct sctp_supported_ext_param { /* AUTH Section 3.1 Random */ typedef struct sctp_random_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u8 random_val[0]; } sctp_random_param_t; /* AUTH Section 3.2 Chunk List */ typedef struct sctp_chunks_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u8 chunks[0]; } sctp_chunks_param_t; /* AUTH Section 3.3 HMAC Algorithm */ typedef struct sctp_hmac_algo_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __be16 hmac_ids[0]; } sctp_hmac_algo_param_t; @@ -345,14 +345,14 @@ typedef sctp_init_chunk_t sctp_initack_chunk_t; /* Section 3.3.3.1 State Cookie (7) */ typedef struct sctp_cookie_param { - sctp_paramhdr_t p; + struct sctp_paramhdr p; __u8 body[0]; } sctp_cookie_param_t; /* Section 3.3.3.1 Unrecognized Parameters (8) */ typedef struct sctp_unrecognized_param { - sctp_paramhdr_t param_hdr; - sctp_paramhdr_t unrecognized; + struct sctp_paramhdr param_hdr; + struct sctp_paramhdr unrecognized; } sctp_unrecognized_param_t; @@ -399,7 +399,7 @@ typedef struct sctp_sack_chunk { */ typedef struct sctp_heartbeathdr { - sctp_paramhdr_t info; + struct sctp_paramhdr info; } sctp_heartbeathdr_t; typedef struct sctp_heartbeat_chunk { @@ -639,7 +639,7 @@ struct sctp_fwdtsn_chunk { * report status of ASCONF processing. */ typedef struct sctp_addip_param { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __be32 crr_id; } sctp_addip_param_t; @@ -724,7 +724,7 @@ struct sctp_reconf_chunk { }; struct sctp_strreset_outreq { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 request_seq; __u32 response_seq; __u32 send_reset_at_tsn; @@ -732,18 +732,18 @@ struct sctp_strreset_outreq { }; struct sctp_strreset_inreq { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 request_seq; __u16 list_of_streams[0]; }; struct sctp_strreset_tsnreq { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 request_seq; }; struct sctp_strreset_addstrm { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 request_seq; __u16 number_of_streams; __u16 reserved; @@ -760,13 +760,13 @@ enum { }; struct sctp_strreset_resp { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 response_seq; __u32 result; }; struct sctp_strreset_resptsn { - sctp_paramhdr_t param_hdr; + struct sctp_paramhdr param_hdr; __u32 response_seq; __u32 result; __u32 senders_next_tsn; -- cgit v1.2.3 From 34b4e29b383559e3848eea30af66e94aa72af88c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:17 +0800 Subject: sctp: remove the typedef sctp_param_t This patch is to remove the typedef sctp_param_t, and replace with struct sctp_paramhdr in the places where it's using this typedef. It is also to remove the useless declaration sctp_addip_addr_config and fix the lack of params for some other functions' declaration. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 142bb6aa88eb..5eecc0f14650 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -167,7 +167,7 @@ struct sctp_paramhdr { __be16 length; }; -typedef enum { +enum sctp_param { /* RFC 2960 Section 3.3.5 */ SCTP_PARAM_HEARTBEAT_INFO = cpu_to_be16(1), @@ -207,7 +207,7 @@ typedef enum { SCTP_PARAM_RESET_RESPONSE = cpu_to_be16(0x0010), SCTP_PARAM_RESET_ADD_OUT_STREAMS = cpu_to_be16(0x0011), SCTP_PARAM_RESET_ADD_IN_STREAMS = cpu_to_be16(0x0012), -} sctp_param_t; /* enum */ +}; /* enum */ /* RFC 2960 Section 3.2.1 -- cgit v1.2.3 From 0664ed4378907c936fbee811efe95650d32baf34 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:18 +0800 Subject: sctp: remove the typedef sctp_param_action_t Remove this typedef, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 5eecc0f14650..d5c0ddadb68b 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -216,12 +216,12 @@ enum sctp_param { * not recognize the Parameter Type. * */ -typedef enum { +enum { SCTP_PARAM_ACTION_DISCARD = cpu_to_be16(0x0000), SCTP_PARAM_ACTION_DISCARD_ERR = cpu_to_be16(0x4000), SCTP_PARAM_ACTION_SKIP = cpu_to_be16(0x8000), SCTP_PARAM_ACTION_SKIP_ERR = cpu_to_be16(0xc000), -} sctp_param_action_t; +}; enum { SCTP_PARAM_ACTION_MASK = cpu_to_be16(0xc000), }; -- cgit v1.2.3 From 3583df1a3d7328b42cf116db3fb56b0368fab12b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:19 +0800 Subject: sctp: remove the typedef sctp_datahdr_t This patch is to remove the typedef sctp_datahdr_t, and replace with struct sctp_datahdr in the places where it's using this typedef. It is also to use izeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index d5c0ddadb68b..55d84c143122 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -227,17 +227,17 @@ enum { SCTP_PARAM_ACTION_MASK = cpu_to_be16(0xc000), }; /* RFC 2960 Section 3.3.1 Payload Data (DATA) (0) */ -typedef struct sctp_datahdr { +struct sctp_datahdr { __be32 tsn; __be16 stream; __be16 ssn; __be32 ppid; __u8 payload[0]; -} sctp_datahdr_t; +}; typedef struct sctp_data_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_datahdr_t data_hdr; + struct sctp_datahdr data_hdr; } sctp_data_chunk_t; /* DATA Chuck Specific Flags */ -- cgit v1.2.3 From 9f8d31471548d9b74609335f9a3c75c7b664c8b4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:20 +0800 Subject: sctp: remove the typedef sctp_data_chunk_t This patch is to remove the typedef sctp_data_chunk_t, and replace with struct sctp_data_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 55d84c143122..91c888f21b24 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -235,10 +235,10 @@ struct sctp_datahdr { __u8 payload[0]; }; -typedef struct sctp_data_chunk { +struct sctp_data_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_datahdr data_hdr; -} sctp_data_chunk_t; +}; /* DATA Chuck Specific Flags */ enum { -- cgit v1.2.3 From 4ae70c0845faba3096aa2be4b2ebfcc3ac590a67 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:21 +0800 Subject: sctp: remove the typedef sctp_inithdr_t This patch is to remove the typedef sctp_inithdr_t, and replace with struct sctp_inithdr in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 91c888f21b24..56241953e57e 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -257,18 +257,18 @@ enum { SCTP_DATA_FRAG_MASK = 0x03, }; * This chunk is used to initiate a SCTP association between two * endpoints. */ -typedef struct sctp_inithdr { +struct sctp_inithdr { __be32 init_tag; __be32 a_rwnd; __be16 num_outbound_streams; __be16 num_inbound_streams; __be32 initial_tsn; __u8 params[0]; -} sctp_inithdr_t; +}; typedef struct sctp_init_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_inithdr_t init_hdr; + struct sctp_inithdr init_hdr; } sctp_init_chunk_t; -- cgit v1.2.3 From 01a992bea523d9568cf56a02003c15c9dc40eb20 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 30 Jun 2017 11:52:22 +0800 Subject: sctp: remove the typedef sctp_init_chunk_t This patch is to remove the typedef sctp_init_chunk_t, and replace with struct sctp_init_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 56241953e57e..99e866487e2f 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -266,10 +266,10 @@ struct sctp_inithdr { __u8 params[0]; }; -typedef struct sctp_init_chunk { +struct sctp_init_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_inithdr init_hdr; -} sctp_init_chunk_t; +}; /* Section 3.3.2.1. IPv4 Address Parameter (5) */ @@ -341,7 +341,7 @@ typedef struct sctp_hmac_algo_param { * The INIT ACK chunk is used to acknowledge the initiation of an SCTP * association. */ -typedef sctp_init_chunk_t sctp_initack_chunk_t; +typedef struct sctp_init_chunk sctp_initack_chunk_t; /* Section 3.3.3.1 State Cookie (7) */ typedef struct sctp_cookie_param { -- cgit v1.2.3 From 40304b2a1567fecc321f640ee4239556dd0f3ee0 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 30 Jun 2017 20:02:40 -0700 Subject: bpf: BPF support for sock_ops Created a new BPF program type, BPF_PROG_TYPE_SOCK_OPS, and a corresponding struct that allows BPF programs of this type to access some of the socket's fields (such as IP addresses, ports, etc.). It uses the existing bpf cgroups infrastructure so the programs can be attached per cgroup with full inheritance support. The program will be called at appropriate times to set relevant connections parameters such as buffer sizes, SYN and SYN-ACK RTOs, etc., based on connection information such as IP addresses, port numbers, etc. Alghough there are already 3 mechanisms to set parameters (sysctls, route metrics and setsockopts), this new mechanism provides some distinct advantages. Unlike sysctls, it can set parameters per connection. In contrast to route metrics, it can also use port numbers and information provided by a user level program. In addition, it could set parameters probabilistically for evaluation purposes (i.e. do something different on 10% of the flows and compare results with the other 90% of the flows). Also, in cases where IPv6 addresses contain geographic information, the rules to make changes based on the distance (or RTT) between the hosts are much easier than route metric rules and can be global. Finally, unlike setsockopt, it oes not require application changes and it can be updated easily at any time. Although the bpf cgroup framework already contains a sock related program type (BPF_PROG_TYPE_CGROUP_SOCK), I created the new type (BPF_PROG_TYPE_SOCK_OPS) beccause the existing type expects to be called only once during the connections's lifetime. In contrast, the new program type will be called multiple times from different places in the network stack code. For example, before sending SYN and SYN-ACKs to set an appropriate timeout, when the connection is established to set congestion control, etc. As a result it has "op" field to specify the type of operation requested. The purpose of this new program type is to simplify setting connection parameters, such as buffer sizes, TCP's SYN RTO, etc. For example, it is easy to use facebook's internal IPv6 addresses to determine if both hosts of a connection are in the same datacenter. Therefore, it is easy to write a BPF program to choose a small SYN RTO value when both hosts are in the same datacenter. This patch only contains the framework to support the new BPF program type, following patches add the functionality to set various connection parameters. This patch defines a new BPF program type: BPF_PROG_TYPE_SOCKET_OPS and a new bpf syscall command to load a new program of this type: BPF_PROG_LOAD_SOCKET_OPS. Two new corresponding structs (one for the kernel one for the user/BPF program): /* kernel version */ struct bpf_sock_ops_kern { struct sock *sk; __u32 op; union { __u32 reply; __u32 replylong[4]; }; }; /* user version * Some fields are in network byte order reflecting the sock struct * Use the bpf_ntohl helper macro in samples/bpf/bpf_endian.h to * convert them to host byte order. */ struct bpf_sock_ops { __u32 op; union { __u32 reply; __u32 replylong[4]; }; __u32 family; __u32 remote_ip4; /* In network byte order */ __u32 local_ip4; /* In network byte order */ __u32 remote_ip6[4]; /* In network byte order */ __u32 local_ip6[4]; /* In network byte order */ __u32 remote_port; /* In network byte order */ __u32 local_port; /* In host byte horder */ }; Currently there are two types of ops. The first type expects the BPF program to return a value which is then used by the caller (or a negative value to indicate the operation is not supported). The second type expects state changes to be done by the BPF program, for example through a setsockopt BPF helper function, and they ignore the return value. The reply fields of the bpf_sockt_ops struct are there in case a bpf program needs to return a value larger than an integer. Signed-off-by: Lawrence Brakmo Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 18 ++++++++++++++++++ include/linux/bpf_types.h | 1 + include/linux/filter.h | 9 +++++++++ 3 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c970a25d2a49..360c082e885c 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -7,6 +7,7 @@ struct sock; struct cgroup; struct sk_buff; +struct bpf_sock_ops_kern; #ifdef CONFIG_CGROUP_BPF @@ -42,6 +43,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, int __cgroup_bpf_run_filter_sk(struct sock *sk, enum bpf_attach_type type); +int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, + struct bpf_sock_ops_kern *sock_ops, + enum bpf_attach_type type); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -75,6 +80,18 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, __ret; \ }) +#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && (sock_ops)->sk) { \ + typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \ + sock_ops, \ + BPF_CGROUP_SOCK_OPS); \ + } \ + __ret; \ +}) #else struct cgroup_bpf {}; @@ -85,6 +102,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp, #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 03bf223f18be..3d137c33d664 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -10,6 +10,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops) #endif #ifdef CONFIG_BPF_EVENTS BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) diff --git a/include/linux/filter.h b/include/linux/filter.h index 1fa26dc562ce..738f8b14f025 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -898,4 +898,13 @@ static inline int bpf_tell_extensions(void) return SKF_AD_MAX; } +struct bpf_sock_ops_kern { + struct sock *sk; + u32 op; + union { + u32 reply; + u32 replylong[4]; + }; +}; + #endif /* __LINUX_FILTER_H__ */ -- cgit v1.2.3 From 5d6dec6fba38c3e2d408df108bb92ef4ac201f18 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 1 Jul 2017 11:01:29 -0700 Subject: locking/refcount: Remove the half-implemented refcount_sub() API CONFIG_REFCOUNT_FULL=y (correctly) does not provide a refcount_sub(), which should not be part of proper refcount design patterns. Remove the erroneous extern and the later !CONFIG_REFCOUNT_FULL accidental implementation. Signed-off-by: Kees Cook Cc: Elena Reshetova Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 29dee3c03abc ("locking/refcounts: Out-of-line everything") Link: http://lkml.kernel.org/r/20170701180129.GA17405@beast Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index bb71f2871dac..591792c8e5b0 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -49,7 +49,6 @@ extern __must_check bool refcount_inc_not_zero(refcount_t *r); extern void refcount_inc(refcount_t *r); extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); -extern void refcount_sub(unsigned int i, refcount_t *r); extern __must_check bool refcount_dec_and_test(refcount_t *r); extern void refcount_dec(refcount_t *r); @@ -79,11 +78,6 @@ static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t return atomic_sub_and_test(i, &r->refs); } -static inline void refcount_sub(unsigned int i, refcount_t *r) -{ - atomic_sub(i, &r->refs); -} - static inline __must_check bool refcount_dec_and_test(refcount_t *r) { return atomic_dec_and_test(&r->refs); -- cgit v1.2.3 From 9ee8a1c4a0e232e9b86e03f7c628ff0286444e00 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 28 Jun 2017 15:14:01 -0500 Subject: PCI: Remove pci_scan_root_bus_msi() The pci_scan_root_bus_bridge() function allows passing a parameterized struct pci_host_bridge and scanning the resulting PCI bus; since the struct msi_controller is part of the struct pci_host_bridge and the struct pci_host_bridge can now be passed to pci_scan_root_bus_bridge() explicitly, there is no need for a scan interface with a MSI controller parameter. With all PCI host controller drivers and platform code relying on pci_scan_root_bus_msi() converted over to pci_scan_root_bus_bridge() the pci_scan_root_bus_msi() becomes obsolete and can be removed. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index b56dc13f47c2..9022b542556a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -849,10 +849,6 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus, int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax); int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax); void pci_bus_release_busn_res(struct pci_bus *b); -struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus, - struct pci_ops *ops, void *sysdata, - struct list_head *resources, - struct msi_controller *msi); struct pci_bus *pci_scan_root_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata, struct list_head *resources); -- cgit v1.2.3 From 3aa8a41e0bf5565946082d23ae589c1a8559494a Mon Sep 17 00:00:00 2001 From: Matthew Minter Date: Wed, 28 Jun 2017 15:14:02 -0500 Subject: PCI: Add IRQ mapping function pointers to pci_host_bridge struct In order to defer IRQ assignment arches must be able to register functions to map and swizzle interrupts. These registered functions are stored in the pci_host_bridge struct. Signed-off-by: Matthew Minter Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 9022b542556a..3c5d8b026d6e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -432,6 +432,8 @@ struct pci_host_bridge { void *sysdata; int busnr; struct list_head windows; /* resource_entry */ + u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* platform IRQ swizzler */ + int (*map_irq)(const struct pci_dev *, u8, u8); void (*release_fn)(struct pci_host_bridge *); void *release_data; struct msi_controller *msi; -- cgit v1.2.3 From 47a650f2795b00297a5a3eab7aaa46bdb2bbe304 Mon Sep 17 00:00:00 2001 From: Matthew Minter Date: Wed, 28 Jun 2017 15:14:02 -0500 Subject: PCI: Add pci_assign_irq() function and have pci_fixup_irqs() use it Here we delete the static pdev_fixup_irq() function which is currently what pci_fixup_irqs() uses to actually assign the IRQs and replace it with the pci_assign_irq() function which changes the interface and uses the new function pointers stored in the host bridge structure. Eventually this will allow pci_fixup_irqs() to be removed entirely and the new deferred assignment code path will call pci_assign_irq() directly. However to ensure current users continue to work, a new implementation of pci_fixup_irqs() is introduced which simply wraps the functionality of pci_assign_irq(). Signed-off-by: Matthew Minter [lorenzo.pieralisi@arm.com: reworked comments/log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 3c5d8b026d6e..5c1c0ae38dd3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1147,6 +1147,7 @@ void pdev_enable_device(struct pci_dev *); int pci_enable_resources(struct pci_dev *, int mask); void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(const struct pci_dev *, u8, u8)); +void pci_assign_irq(struct pci_dev *dev); struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res); #define HAVE_PCI_REQ_REGIONS 2 int __must_check pci_request_regions(struct pci_dev *, const char *); -- cgit v1.2.3 From 334fd34d76f237c0ee58dfc400d2c4e34d660544 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 29 Jun 2017 11:43:20 -0700 Subject: vfs: Add page_cache_seek_hole_data helper Both ext4 and xfs implement seeking for the next hole or piece of data in unwritten extents by scanning the page cache, and both versions share the same bug when iterating the buffers of a page: the start offset into the page isn't taken into account, so when a page fits more than two filesystem blocks, things will go wrong. For example, on a filesystem with a block size of 1k, the following command will fail: xfs_io -f -c "falloc 0 4k" \ -c "pwrite 1k 1k" \ -c "pwrite 3k 1k" \ -c "seek -a -r 0" foo In this example, neither lseek(fd, 1024, SEEK_HOLE) nor lseek(fd, 2048, SEEK_DATA) will return the correct result. Introduce a generic vfs helper for seeking in the page cache that gets this right. The next commits will replace the filesystem specific implementations. Signed-off-by: Andreas Gruenbacher [hch: dropped the export] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/buffer_head.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index bd029e52ef5e..ad4e024ce17e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -201,6 +201,8 @@ void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); int bh_submit_read(struct buffer_head *bh); +loff_t page_cache_seek_hole_data(struct inode *inode, loff_t offset, + loff_t length, int whence); extern int buffer_heads_over_limit; -- cgit v1.2.3 From 0ed3b0d45fd39142e418220f518c8959c1a5f596 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 29 Jun 2017 11:43:21 -0700 Subject: vfs: Add iomap_seek_hole and iomap_seek_data helpers Filesystems can use this for implementing lseek SEEK_HOLE / SEEK_DATA support via iomap. Signed-off-by: Andreas Gruenbacher [hch: split functions, coding style cleanups] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index f753e788da31..8a03f5dcd89b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -83,6 +83,10 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, loff_t len, const struct iomap_ops *ops); +loff_t iomap_seek_hole(struct inode *inode, loff_t offset, + const struct iomap_ops *ops); +loff_t iomap_seek_data(struct inode *inode, loff_t offset, + const struct iomap_ops *ops); /* * Flags for direct I/O ->end_io: -- cgit v1.2.3 From 277036f05be242540b7bfe75f226107d04f51b06 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 2 Jun 2017 07:43:27 +0200 Subject: platform: Accept const properties Aligns us with device_add_properties, the function we call. Signed-off-by: Jan Kiszka Reviewed-by: Andy Shevchenko --- include/linux/platform_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 98c2a7c7108e..49f634d96118 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -172,7 +172,7 @@ extern int platform_device_add_resources(struct platform_device *pdev, extern int platform_device_add_data(struct platform_device *pdev, const void *data, size_t size); extern int platform_device_add_properties(struct platform_device *pdev, - struct property_entry *properties); + const struct property_entry *properties); extern int platform_device_add(struct platform_device *pdev); extern void platform_device_del(struct platform_device *pdev); extern void platform_device_put(struct platform_device *pdev); -- cgit v1.2.3 From c851a9dc4359c6b19722de568e9f543c1c23481c Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Sun, 2 Jul 2017 10:29:21 +0300 Subject: qed: Introduce iWARP personality iWARP personality introduced the need for differentiating in several places in the code whether we are RoCE, iWARP or either. This leads to introducing new macros for querying the personality. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index a567cbf8c5b4..885ae1379b5a 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -778,7 +778,7 @@ enum protocol_type { PROTOCOLID_ROCE, PROTOCOLID_CORE, PROTOCOLID_ETH, - PROTOCOLID_RESERVED4, + PROTOCOLID_IWARP, PROTOCOLID_RESERVED5, PROTOCOLID_PREROCE, PROTOCOLID_COMMON, -- cgit v1.2.3 From 67b40dccc45ff5d488aad17114e80e00029fd854 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Sun, 2 Jul 2017 10:29:22 +0300 Subject: qed: Implement iWARP initialization, teardown and qp operations This patch adds iWARP support for flows that have common code between RoCE and iWARP, such as initialization, teardown and qp setup verbs: create, destroy, modify, query. It introduces the iWARP specific files qed_iwarp.[ch] and iwarp_common.h Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/iwarp_common.h | 53 ++++++++++++++++++++++++++++++++++++++++ include/linux/qed/qed_rdma_if.h | 1 + 2 files changed, 54 insertions(+) create mode 100644 include/linux/qed/iwarp_common.h (limited to 'include/linux') diff --git a/include/linux/qed/iwarp_common.h b/include/linux/qed/iwarp_common.h new file mode 100644 index 000000000000..b8b3e1cfae90 --- /dev/null +++ b/include/linux/qed/iwarp_common.h @@ -0,0 +1,53 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __IWARP_COMMON__ +#define __IWARP_COMMON__ +#include +/************************/ +/* IWARP FW CONSTANTS */ +/************************/ + +#define IWARP_ACTIVE_MODE 0 +#define IWARP_PASSIVE_MODE 1 + +#define IWARP_SHARED_QUEUE_PAGE_SIZE (0x8000) +#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET (0x4000) +#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE (0x1000) +#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET (0x5000) +#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE (0x3000) + +#define IWARP_REQ_MAX_INLINE_DATA_SIZE (128) +#define IWARP_REQ_MAX_SINGLE_SQ_WQE_SIZE (176) + +#define IWARP_MAX_QPS (64 * 1024) + +#endif /* __IWARP_COMMON__ */ diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index ff9be01b5f53..5b4bb09a3354 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -491,6 +491,7 @@ struct qed_roce_ll2_packet { enum qed_rdma_type { QED_RDMA_TYPE_ROCE, + QED_RDMA_TYPE_IWARP }; struct qed_dev_rdma_info { -- cgit v1.2.3 From 526d1d05e456c9cfc077694d18b5f521e2338f18 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Sun, 2 Jul 2017 10:29:23 +0300 Subject: qed: Rename some ll2 related defines Make some names more generic as they will be used by iWARP too. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 5958b45eb699..c9c56bc42a82 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -47,7 +47,7 @@ enum qed_ll2_conn_type { QED_LL2_TYPE_FCOE, QED_LL2_TYPE_ISCSI, QED_LL2_TYPE_TEST, - QED_LL2_TYPE_ISCSI_OOO, + QED_LL2_TYPE_OOO, QED_LL2_TYPE_RESERVED2, QED_LL2_TYPE_ROCE, QED_LL2_TYPE_RESERVED3, -- cgit v1.2.3 From cc4ad324e7e247bb4979791dd4f2ff11419d9742 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Sun, 2 Jul 2017 10:29:24 +0300 Subject: qed: Add iWARP support in ll2 connections Add a new connection type for iWARP ll2 connections for setting correct ll2 filters and connection type to FW. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index c9c56bc42a82..dd7a3b86bb9e 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -50,6 +50,7 @@ enum qed_ll2_conn_type { QED_LL2_TYPE_OOO, QED_LL2_TYPE_RESERVED2, QED_LL2_TYPE_ROCE, + QED_LL2_TYPE_IWARP, QED_LL2_TYPE_RESERVED3, MAX_QED_LL2_RX_CONN_TYPE }; -- cgit v1.2.3 From 65a91a6cdb868a28b919ca133c0f9d9dfd9a635a Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Sun, 2 Jul 2017 10:29:26 +0300 Subject: qed: iWARP CM add listener functions and initial SYN processing This patch adds the ability to add and remove listeners and identify whether the SYN packet received is intended for iWARP or not. If a listener is not found the SYN packet is posted back to the chip. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_rdma_if.h | 52 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index 5b4bb09a3354..28df5688ad0c 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -470,6 +470,52 @@ struct qed_rdma_counters_out_params { #define QED_ROCE_TX_HEAD_FAILURE (1) #define QED_ROCE_TX_FRAG_FAILURE (2) +enum qed_iwarp_event_type { + QED_IWARP_EVENT_MPA_REQUEST, /* Passive side request received */ +}; + +enum qed_tcp_ip_version { + QED_TCP_IPV4, + QED_TCP_IPV6, +}; + +struct qed_iwarp_cm_info { + enum qed_tcp_ip_version ip_version; + u32 remote_ip[4]; + u32 local_ip[4]; + u16 remote_port; + u16 local_port; + u16 vlan; + u8 ord; + u8 ird; + u16 private_data_len; + const void *private_data; +}; + +struct qed_iwarp_cm_event_params { + enum qed_iwarp_event_type event; + const struct qed_iwarp_cm_info *cm_info; + void *ep_context; /* To be passed to accept call */ + int status; +}; + +typedef int (*iwarp_event_handler) (void *context, + struct qed_iwarp_cm_event_params *event); + +struct qed_iwarp_listen_in { + iwarp_event_handler event_cb; + void *cb_context; /* passed to event_cb */ + u32 max_backlog; + enum qed_tcp_ip_version ip_version; + u32 ip_addr[4]; + u16 port; + u16 vlan; +}; + +struct qed_iwarp_listen_out { + void *handle; +}; + struct qed_roce_ll2_header { void *vaddr; dma_addr_t baddr; @@ -576,6 +622,12 @@ struct qed_rdma_ops { int (*ll2_set_mac_filter)(struct qed_dev *cdev, u8 *old_mac_address, u8 *new_mac_address); + int (*iwarp_create_listen)(void *rdma_cxt, + struct qed_iwarp_listen_in *iparams, + struct qed_iwarp_listen_out *oparams); + + int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle); + }; const struct qed_rdma_ops *qed_get_rdma_ops(void); -- cgit v1.2.3 From 456a584947d5b92d5e5a62cc68125ab5f150aa8c Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Sun, 2 Jul 2017 10:29:27 +0300 Subject: qed: iWARP CM add passive side connect This patch implements the passive side connect. It addresses pre-allocating resources, creating a connection element upon valid SYN packet received. Calling upper layer and implementation of the accept/reject calls. Error handling is not part of this patch. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 2 ++ include/linux/qed/qed_rdma_if.h | 26 +++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 885ae1379b5a..39e2a2ac2471 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -38,6 +38,8 @@ #include /* dma_addr_t manip */ +#define PTR_LO(x) ((u32)(((uintptr_t)(x)) & 0xffffffff)) +#define PTR_HI(x) ((u32)((((uintptr_t)(x)) >> 16) >> 16)) #define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) #define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) #define DMA_REGPAIR_LE(x, val) do { \ diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index 28df5688ad0c..c4c241fe2579 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -471,7 +471,8 @@ struct qed_rdma_counters_out_params { #define QED_ROCE_TX_FRAG_FAILURE (2) enum qed_iwarp_event_type { - QED_IWARP_EVENT_MPA_REQUEST, /* Passive side request received */ + QED_IWARP_EVENT_MPA_REQUEST, /* Passive side request received */ + QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */ }; enum qed_tcp_ip_version { @@ -516,6 +517,23 @@ struct qed_iwarp_listen_out { void *handle; }; +struct qed_iwarp_accept_in { + void *ep_context; + void *cb_context; + struct qed_rdma_qp *qp; + const void *private_data; + u16 private_data_len; + u8 ord; + u8 ird; +}; + +struct qed_iwarp_reject_in { + void *ep_context; + void *cb_context; + const void *private_data; + u16 private_data_len; +}; + struct qed_roce_ll2_header { void *vaddr; dma_addr_t baddr; @@ -626,6 +644,12 @@ struct qed_rdma_ops { struct qed_iwarp_listen_in *iparams, struct qed_iwarp_listen_out *oparams); + int (*iwarp_accept)(void *rdma_cxt, + struct qed_iwarp_accept_in *iparams); + + int (*iwarp_reject)(void *rdma_cxt, + struct qed_iwarp_reject_in *iparams); + int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle); }; -- cgit v1.2.3 From 4b0fdd7c8b757125ac7996617d914bbdb9e0348c Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Sun, 2 Jul 2017 10:29:28 +0300 Subject: qed: iWARP CM add active side connect This patch implements the active side connect. Offload a connection, process MPA reply and send RTR. In some of the common passive/active functions, the active side will work in blocking mode. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_rdma_if.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index c4c241fe2579..e9514a69b03f 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -473,6 +473,8 @@ struct qed_rdma_counters_out_params { enum qed_iwarp_event_type { QED_IWARP_EVENT_MPA_REQUEST, /* Passive side request received */ QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */ + QED_IWARP_EVENT_ACTIVE_COMPLETE, /* Active side reply received */ + QED_IWARP_EVENT_ACTIVE_MPA_REPLY, }; enum qed_tcp_ip_version { @@ -503,6 +505,20 @@ struct qed_iwarp_cm_event_params { typedef int (*iwarp_event_handler) (void *context, struct qed_iwarp_cm_event_params *event); +struct qed_iwarp_connect_in { + iwarp_event_handler event_cb; + void *cb_context; + struct qed_rdma_qp *qp; + struct qed_iwarp_cm_info cm_info; + u16 mss; + u8 remote_mac_addr[ETH_ALEN]; + u8 local_mac_addr[ETH_ALEN]; +}; + +struct qed_iwarp_connect_out { + void *ep_context; +}; + struct qed_iwarp_listen_in { iwarp_event_handler event_cb; void *cb_context; /* passed to event_cb */ @@ -534,6 +550,10 @@ struct qed_iwarp_reject_in { u16 private_data_len; }; +struct qed_iwarp_send_rtr_in { + void *ep_context; +}; + struct qed_roce_ll2_header { void *vaddr; dma_addr_t baddr; @@ -640,6 +660,10 @@ struct qed_rdma_ops { int (*ll2_set_mac_filter)(struct qed_dev *cdev, u8 *old_mac_address, u8 *new_mac_address); + int (*iwarp_connect)(void *rdma_cxt, + struct qed_iwarp_connect_in *iparams, + struct qed_iwarp_connect_out *oparams); + int (*iwarp_create_listen)(void *rdma_cxt, struct qed_iwarp_listen_in *iparams, struct qed_iwarp_listen_out *oparams); @@ -652,6 +676,8 @@ struct qed_rdma_ops { int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle); + int (*iwarp_send_rtr)(void *rdma_cxt, + struct qed_iwarp_send_rtr_in *iparams); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); -- cgit v1.2.3 From fc4c6065e661224df3db50780219ac53fee56e2b Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Sun, 2 Jul 2017 10:29:29 +0300 Subject: qed: iWARP implement disconnect flows This patch takes care of active/passive disconnect flows. Disconnect flows can be initiated remotely, in which case a async event will arrive from peer and indicated to qedr driver. These are referred to as exceptions. When a QP is destroyed, it needs to check that it's associated ep has been closed. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_rdma_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index e9514a69b03f..01966c3a3e5d 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -474,6 +474,8 @@ enum qed_iwarp_event_type { QED_IWARP_EVENT_MPA_REQUEST, /* Passive side request received */ QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */ QED_IWARP_EVENT_ACTIVE_COMPLETE, /* Active side reply received */ + QED_IWARP_EVENT_DISCONNECT, + QED_IWARP_EVENT_CLOSE, QED_IWARP_EVENT_ACTIVE_MPA_REPLY, }; -- cgit v1.2.3 From 9816b614346925feac1198e33d2dc5201c4ef74e Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Sun, 2 Jul 2017 10:29:30 +0300 Subject: qed: iWARP CM add error handling This patch introduces error handling for errors that occurred during connection establishment. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_rdma_if.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index 01966c3a3e5d..4dd72ba210f5 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -476,7 +476,16 @@ enum qed_iwarp_event_type { QED_IWARP_EVENT_ACTIVE_COMPLETE, /* Active side reply received */ QED_IWARP_EVENT_DISCONNECT, QED_IWARP_EVENT_CLOSE, + QED_IWARP_EVENT_IRQ_FULL, + QED_IWARP_EVENT_RQ_EMPTY, + QED_IWARP_EVENT_LLP_TIMEOUT, + QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR, + QED_IWARP_EVENT_CQ_OVERFLOW, + QED_IWARP_EVENT_QP_CATASTROPHIC, QED_IWARP_EVENT_ACTIVE_MPA_REPLY, + QED_IWARP_EVENT_LOCAL_ACCESS_ERROR, + QED_IWARP_EVENT_REMOTE_OPERATION_ERROR, + QED_IWARP_EVENT_TERMINATE_RECEIVED }; enum qed_tcp_ip_version { -- cgit v1.2.3 From e1069bbfcf3bcf4feb264397f3451184fd66b907 Mon Sep 17 00:00:00 2001 From: Jim Baxter Date: Wed, 28 Jun 2017 21:35:29 +0100 Subject: net: cdc_ncm: Reduce memory use when kernel memory low MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CDC-NCM driver can require large amounts of memory to create skb's and this can be a problem when the memory becomes fragmented. This especially affects embedded systems that have constrained resources but wish to maximise the throughput of CDC-NCM with 16KiB NTB's. The issue is after running for a while the kernel memory can become fragmented and it needs compacting. If the NTB allocation is needed before the memory has been compacted the atomic allocation can fail which can cause increased latency, large re-transmissions or disconnections depending upon the data being transmitted at the time. This situation occurs for less than a second until the kernel has compacted the memory but the failed devices can take a lot longer to recover from the failed TX packets. To ease this temporary situation I modified the CDC-NCM TX path to temporarily switch into a reduced memory mode which allocates an NTB that will fit into a USB_CDC_NCM_NTB_MIN_OUT_SIZE (default 2048 Bytes) sized memory block and only transmit NTB's with a single network frame until the memory situation is resolved. Each time this issue occurs we wait for an increasing number of reduced size allocations before requesting a full size one to not put additional pressure on a low memory system. Once the memory is compacted the CDC-NCM data can resume transmitting at the normal tx_max rate once again. Signed-off-by: Jim Baxter Reviewed-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 00d232406f18..021f7a88f52c 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -117,6 +117,9 @@ struct cdc_ncm_ctx { u32 tx_curr_frame_num; u32 rx_max; u32 tx_max; + u32 tx_curr_size; + u32 tx_low_mem_max_cnt; + u32 tx_low_mem_val; u32 max_datagram_size; u16 tx_max_datagrams; u16 tx_remainder; -- cgit v1.2.3 From f0f9b4ed23381d97cde2ac64248198bc43608e6d Mon Sep 17 00:00:00 2001 From: Lin Yun Sheng Date: Fri, 30 Jun 2017 17:44:15 +0800 Subject: net: phy: Add phy loopback support in net phy framework This patch add set_loopback in phy_driver, which is used by MAC driver to enable or disable phy loopback. it also add a generic genphy_loopback function, which use BMCR loopback bit to enable or disable loopback. Signed-off-by: Lin Yun Sheng Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 1d8d70193782..2a9567bb8186 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -372,6 +372,7 @@ struct phy_c45_device_ids { * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. + * loopback_enabled: Set true if this phy has been loopbacked successfully. * state: state of the PHY for management purposes * dev_flags: Device-specific flags used by the PHY driver. * link_timeout: The number of timer firings to wait before the @@ -409,6 +410,7 @@ struct phy_device { bool has_fixups; bool suspended; bool sysfs_links; + bool loopback_enabled; enum phy_state state; @@ -648,6 +650,7 @@ struct phy_driver { int (*set_tunable)(struct phy_device *dev, struct ethtool_tunable *tuna, const void *data); + int (*set_loopback)(struct phy_device *dev, bool enable); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -793,6 +796,7 @@ void phy_device_remove(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); +int phy_loopback(struct phy_device *phydev, bool enable); struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface); struct phy_device *phy_find_first(struct mii_bus *bus); @@ -847,6 +851,7 @@ int genphy_update_link(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); +int genphy_loopback(struct phy_device *phydev, bool enable); int genphy_soft_reset(struct phy_device *phydev); static inline int genphy_no_soft_reset(struct phy_device *phydev) { -- cgit v1.2.3 From 0daf4349406074fc03e4889ba5e97e6fb5311bab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 2 Jul 2017 02:13:25 +0200 Subject: bpf, net: add skb_mac_header_len helper Add a small skb_mac_header_len() helper similarly as the skb_network_header_len() we have and replace open coded places in BPF's bpf_skb_change_proto() helper. Will also be used in upcoming work. Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d0b9f3846eab..3d3ceaac13b1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2206,6 +2206,11 @@ static inline int skb_mac_offset(const struct sk_buff *skb) return skb_mac_header(skb) - skb->data; } +static inline u32 skb_mac_header_len(const struct sk_buff *skb) +{ + return skb->network_header - skb->mac_header; +} + static inline int skb_mac_header_was_set(const struct sk_buff *skb) { return skb->mac_header != (typeof(skb->mac_header))~0U; -- cgit v1.2.3 From f96da09473b52c09125cc9bf7d7d4576ae8229e0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 2 Jul 2017 02:13:27 +0200 Subject: bpf: simplify narrower ctx access This work tries to make the semantics and code around the narrower ctx access a bit easier to follow. Right now everything is done inside the .is_valid_access(). Offset matching is done differently for read/write types, meaning writes don't support narrower access and thus matching only on offsetof(struct foo, bar) is enough whereas for read case that supports narrower access we must check for offsetof(struct foo, bar) + offsetof(struct foo, bar) + sizeof() - 1 for each of the cases. For read cases of individual members that don't support narrower access (like packet pointers or skb->cb[] case which has its own narrow access logic), we check as usual only offsetof(struct foo, bar) like in write case. Then, for the case where narrower access is allowed, we also need to set the aux info for the access. Meaning, ctx_field_size and converted_op_size have to be set. First is the original field size e.g. sizeof() as in above example from the user facing ctx, and latter one is the target size after actual rewrite happened, thus for the kernel facing ctx. Also here we need the range match and we need to keep track changing convert_ctx_access() and converted_op_size from is_valid_access() as both are not at the same location. We can simplify the code a bit: check_ctx_access() becomes simpler in that we only store ctx_field_size as a meta data and later in convert_ctx_accesses() we fetch the target_size right from the location where we do convert. Should the verifier be misconfigured we do reject for BPF_WRITE cases or target_size that are not provided. For the subsystems, we always work on ranges in is_valid_access() and add small helpers for ranges and narrow access, convert_ctx_accesses() sets target_size for the relevant instruction. Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Cc: Yonghong Song Signed-off-by: David S. Miller --- include/linux/bpf.h | 9 +++++++-- include/linux/filter.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5175729270d7..b69e7a5869ff 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -156,9 +156,14 @@ struct bpf_prog; struct bpf_insn_access_aux { enum bpf_reg_type reg_type; int ctx_field_size; - int converted_op_size; }; +static inline void +bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) +{ + aux->ctx_field_size = size; +} + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -173,7 +178,7 @@ struct bpf_verifier_ops { u32 (*convert_ctx_access)(enum bpf_access_type type, const struct bpf_insn *src, struct bpf_insn *dst, - struct bpf_prog *prog); + struct bpf_prog *prog, u32 *target_size); int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 738f8b14f025..f1fc9baa3509 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -337,6 +337,22 @@ struct bpf_prog_aux; bpf_size; \ }) +#define bpf_size_to_bytes(bpf_size) \ +({ \ + int bytes = -EINVAL; \ + \ + if (bpf_size == BPF_B) \ + bytes = sizeof(u8); \ + else if (bpf_size == BPF_H) \ + bytes = sizeof(u16); \ + else if (bpf_size == BPF_W) \ + bytes = sizeof(u32); \ + else if (bpf_size == BPF_DW) \ + bytes = sizeof(u64); \ + \ + bytes; \ +}) + #define BPF_SIZEOF(type) \ ({ \ const int __size = bytes_to_bpf_size(sizeof(type)); \ @@ -351,6 +367,13 @@ struct bpf_prog_aux; __size; \ }) +#define BPF_LDST_BYTES(insn) \ + ({ \ + const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \ + WARN_ON(__size < 0); \ + __size; \ + }) + #define __BPF_MAP_0(m, v, ...) v #define __BPF_MAP_1(m, v, t, a, ...) m(t, a) #define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) @@ -401,6 +424,18 @@ struct bpf_prog_aux; #define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) #define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define bpf_ctx_range(TYPE, MEMBER) \ + offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 +#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \ + offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1 + +#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ + ({ \ + BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \ + *(PTR_SIZE) = (SIZE); \ + offsetof(TYPE, MEMBER); \ + }) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { @@ -564,6 +599,18 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) return prog->type == BPF_PROG_TYPE_UNSPEC; } +static inline bool +bpf_ctx_narrow_access_ok(u32 off, u32 size, const u32 size_default) +{ + bool off_ok; +#ifdef __LITTLE_ENDIAN + off_ok = (off & (size_default - 1)) == 0; +#else + off_ok = (off & (size_default - 1)) + size == size_default; +#endif + return off_ok && size <= size_default && (size & (size - 1)) == 0; +} + #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) #ifdef CONFIG_ARCH_HAS_SET_MEMORY -- cgit v1.2.3 From 775755ed3c65fb2d31f9268162495d76eaa2c281 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jun 2017 13:10:38 +0200 Subject: PCI: Split ->reset_notify() method into ->reset_prepare() and ->reset_done() The pci_error_handlers->reset_notify() method had a flag to indicate whether to prepare for or clean up after a reset. The prepare and done cases have no shared functionality whatsoever, so split them into separate methods. [bhelgaas: changelog, update locking comments] Link: http://lkml.kernel.org/r/20170601111039.8913-3-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c7cfdff2529c..c5937ee7e774 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -698,7 +698,8 @@ struct pci_error_handlers { pci_ers_result_t (*slot_reset)(struct pci_dev *dev); /* PCI function reset prepare or completed */ - void (*reset_notify)(struct pci_dev *dev, bool prepare); + void (*reset_prepare)(struct pci_dev *dev); + void (*reset_done)(struct pci_dev *dev); /* Device driver may resume normal operations */ void (*resume)(struct pci_dev *dev); -- cgit v1.2.3 From 468138d78510688fb5476f98d23f11ac6a63229a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 May 2017 19:52:17 -0400 Subject: binfmt_flat: flat_{get,put}_addr_from_rp() should be able to fail on MMU targets EFAULT is possible here. Make both return 0 or error, passing what used to be the return value of flat_get_addr_from_rp() by reference. Signed-off-by: Al Viro --- include/linux/flat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/flat.h b/include/linux/flat.h index 2c1eb15c4ba4..7d542dfd0def 100644 --- a/include/linux/flat.h +++ b/include/linux/flat.h @@ -9,8 +9,8 @@ #ifndef _LINUX_FLAT_H #define _LINUX_FLAT_H -#include #include +#include /* * While it would be nice to keep this header clean, users of older -- cgit v1.2.3 From fbd08e7673f950854679e5d79a30bb25e77a9d08 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:10 -0700 Subject: bio-integrity: fix interface for bio_integrity_trim bio_integrity_trim inherent it's interface from bio_trim and accept offset and size, but this API is error prone because data offset must always be insync with bio's data offset. That is why we have integrity update hook in bio_advance() So only meaningful values are: offset == 0, sectors == bio_sectors(bio) Let's just remove them completely. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Dmitry Monakhov Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 664a27da276d..1d74f5120369 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -728,7 +728,7 @@ extern bool bio_integrity_enabled(struct bio *bio); extern int bio_integrity_prep(struct bio *); extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); -extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); +extern void bio_integrity_trim(struct bio *); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); extern int bioset_integrity_create(struct bio_set *, int); extern void bioset_integrity_free(struct bio_set *); @@ -778,8 +778,7 @@ static inline void bio_integrity_advance(struct bio *bio, return; } -static inline void bio_integrity_trim(struct bio *bio, unsigned int offset, - unsigned int sectors) +static inline void bio_integrity_trim(struct bio *bio) { return; } -- cgit v1.2.3 From e23947bd76f00701f9407af23e671f4da96f5f25 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:11 -0700 Subject: bio-integrity: fold bio_integrity_enabled to bio_integrity_prep Currently all integrity prep hooks are open-coded, and if prepare fails we ignore it's code and fail bio with EIO. Let's return real error to upper layer, so later caller may react accordingly. In fact no one want to use bio_integrity_prep() w/o bio_integrity_enabled, so it is reasonable to fold it in to one function. Signed-off-by: Dmitry Monakhov Reviewed-by: Martin K. Petersen [hch: merged with the latest block tree, return bool from bio_integrity_prep] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1d74f5120369..b3b5f5a89a9c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -724,8 +724,7 @@ struct biovec_slab { extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); -extern bool bio_integrity_enabled(struct bio *bio); -extern int bio_integrity_prep(struct bio *); +extern bool bio_integrity_prep(struct bio *); extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *); @@ -741,11 +740,6 @@ static inline void *bio_integrity(struct bio *bio) return NULL; } -static inline bool bio_integrity_enabled(struct bio *bio) -{ - return false; -} - static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) { return 0; @@ -756,9 +750,9 @@ static inline void bioset_integrity_free (struct bio_set *bs) return; } -static inline int bio_integrity_prep(struct bio *bio) +static inline bool bio_integrity_prep(struct bio *bio) { - return 0; + return true; } static inline void bio_integrity_free(struct bio *bio) -- cgit v1.2.3 From 128b6f9fdd9ace9e56cb3a263b4bc269658f9c40 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:12 -0700 Subject: t10-pi: Move opencoded contants to common header Signed-off-by: Dmitry Monakhov Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/t10-pi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 9375d23a24e7..635a3c5706bd 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -33,6 +33,8 @@ struct t10_pi_tuple { __be32 ref_tag; /* Target LBA or indirect LBA */ }; +#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff) +#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff) extern const struct blk_integrity_profile t10_pi_type1_crc; extern const struct blk_integrity_profile t10_pi_type1_ip; -- cgit v1.2.3 From b1fb2c52b2d85f51f36f1661409f9aeef94265ff Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:13 -0700 Subject: block: guard bvec iteration logic Currently if some one try to advance bvec beyond it's size we simply dump WARN_ONCE and continue to iterate beyond bvec array boundaries. This simply means that we endup dereferencing/corrupting random memory region. Sane reaction would be to propagate error back to calling context But bvec_iter_advance's calling context is not always good for error handling. For safity reason let truncate iterator size to zero which will break external iteration loop which prevent us from unpredictable memory range corruption. And even it caller ignores an error, it will corrupt it's own bvecs, not others. This patch does: - Return error back to caller with hope that it will react on this - Truncate iterator size Code was added long time ago here 4550dd6c, luckily no one hit it in real life :) Signed-off-by: Dmitry Monakhov Reviewed-by: Ming Lei Reviewed-by: Martin K. Petersen [hch: switch to true/false returns instead of errno values] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 +++- include/linux/bvec.h | 14 +++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index b3b5f5a89a9c..d5e8689f86b8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -167,8 +167,10 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; - else + else { bvec_iter_advance(bio->bi_io_vec, iter, bytes); + /* TODO: It is reasonable to complete bio with error here. */ + } } #define __bio_for_each_segment(bvl, bio, iter, start) \ diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 89b65b82d98f..de317b4c13c1 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -22,6 +22,7 @@ #include #include +#include /* * was unsigned short, but we might as well be ready for > 64kB I/O pages @@ -66,12 +67,14 @@ struct bvec_iter { .bv_offset = bvec_iter_offset((bvec), (iter)), \ }) -static inline void bvec_iter_advance(const struct bio_vec *bv, - struct bvec_iter *iter, - unsigned bytes) +static inline bool bvec_iter_advance(const struct bio_vec *bv, + struct bvec_iter *iter, unsigned bytes) { - WARN_ONCE(bytes > iter->bi_size, - "Attempted to advance past end of bvec iter\n"); + if (WARN_ONCE(bytes > iter->bi_size, + "Attempted to advance past end of bvec iter\n")) { + iter->bi_size = 0; + return false; + } while (bytes) { unsigned iter_len = bvec_iter_len(bv, *iter); @@ -86,6 +89,7 @@ static inline void bvec_iter_advance(const struct bio_vec *bv, iter->bi_idx++; } } + return true; } #define for_each_bvec(bvl, bio_vec, iter, start) \ -- cgit v1.2.3 From f9df1cd99ebd82f05e8f5e0aa7e38cb8d3c791d7 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:14 -0700 Subject: bio: add bvec_iter rewind API Some ->bi_end_io handlers (for example: pi_verify or decrypt handlers) need to know original data vector, but after bio traverse io-stack it may be advanced, splited and relocated many times so it is hard to guess original iterator. Let's add 'bi_done' conter which accounts number of bytes iterator was advanced during it's evolution. Later end_io handler may easily restore original iterator by rewinding iterator to iter->bi_done. Note: this change makes sizeof (struct bvec_iter) multiple to 8 Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Dmitry Monakhov [hch: switched to true/false return] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 19 +++++++++++++++++-- include/linux/bvec.h | 27 +++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index d5e8689f86b8..1eba19580185 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -165,14 +165,29 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, { iter->bi_sector += bytes >> 9; - if (bio_no_advance_iter(bio)) + if (bio_no_advance_iter(bio)) { iter->bi_size -= bytes; - else { + iter->bi_done += bytes; + } else { bvec_iter_advance(bio->bi_io_vec, iter, bytes); /* TODO: It is reasonable to complete bio with error here. */ } } +static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter, + unsigned int bytes) +{ + iter->bi_sector -= bytes >> 9; + + if (bio_no_advance_iter(bio)) { + iter->bi_size += bytes; + iter->bi_done -= bytes; + return true; + } + + return bvec_iter_rewind(bio->bi_io_vec, iter, bytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ diff --git a/include/linux/bvec.h b/include/linux/bvec.h index de317b4c13c1..ec8a4d7af6bd 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -40,6 +40,8 @@ struct bvec_iter { unsigned int bi_idx; /* current index into bvl_vec */ + unsigned int bi_done; /* number of bytes completed */ + unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ }; @@ -83,6 +85,7 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, bytes -= len; iter->bi_size -= len; iter->bi_bvec_done += len; + iter->bi_done += len; if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { iter->bi_bvec_done = 0; @@ -92,6 +95,30 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } +static inline bool bvec_iter_rewind(const struct bio_vec *bv, + struct bvec_iter *iter, + unsigned int bytes) +{ + while (bytes) { + unsigned len = min(bytes, iter->bi_bvec_done); + + if (iter->bi_bvec_done == 0) { + if (WARN_ONCE(iter->bi_idx == 0, + "Attempted to rewind iter beyond " + "bvec's boundaries\n")) { + return false; + } + iter->bi_idx--; + iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len; + continue; + } + bytes -= len; + iter->bi_size += len; + iter->bi_bvec_done -= len; + } + return true; +} + #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ -- cgit v1.2.3 From 7c20f11680a441df09de7235206f70115fbf6290 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Jul 2017 16:58:43 -0600 Subject: bio-integrity: stop abusing bi_end_io And instead call directly into the integrity code from bio_end_io. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1eba19580185..7b1cf4ba0902 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -320,8 +320,6 @@ struct bio_integrity_payload { struct bvec_iter bip_iter; - bio_end_io_t *bip_end_io; /* saved I/O completion fn */ - unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ @@ -739,10 +737,8 @@ struct biovec_slab { bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); -extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); @@ -772,11 +768,6 @@ static inline bool bio_integrity_prep(struct bio *bio) return true; } -static inline void bio_integrity_free(struct bio *bio) -{ - return; -} - static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { -- cgit v1.2.3 From 9114014cf4e6df0b22d764380ae1fc54f1a7a8b2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jun 2017 23:33:37 +0200 Subject: genirq: Add mutex to irq desc to serialize request/free_irq() The irq_request/release_resources() callbacks ar currently invoked under desc->lock with interrupts disabled. This is a source of problems on RT and conceptually not required. Add a seperate mutex to struct irq_desc which allows to serialize request/free_irq(), which can be used to move the resource functions out of the desc->lock held region. Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: Heiko Stuebner Cc: Julia Cartwright Cc: Linus Walleij Cc: Brian Norris Cc: Doug Anderson Cc: linux-rockchip@lists.infradead.org Cc: John Keeping Cc: linux-gpio@vger.kernel.org Link: http://lkml.kernel.org/r/20170629214344.039220922@linutronix.de --- include/linux/irqdesc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index d425a3a09722..3e90a094798d 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -3,6 +3,7 @@ #include #include +#include /* * Core internal functions to deal with irq descriptors @@ -45,6 +46,7 @@ struct pt_regs; * IRQF_FORCE_RESUME set * @rcu: rcu head for delayed free * @kobj: kobject used to represent this struct in sysfs + * @request_mutex: mutex to protect request/free before locking desc->lock * @dir: /proc/irq/ procfs entry * @debugfs_file: dentry for the debugfs file * @name: flow handler name for /proc/interrupts output @@ -96,6 +98,7 @@ struct irq_desc { struct rcu_head rcu; struct kobject kobj; #endif + struct mutex request_mutex; int parent_irq; struct module *owner; const char *name; -- cgit v1.2.3 From 119d0312c766773ca3238b9d926077664eed22be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 25 May 2017 16:28:49 -0400 Subject: kill __copy_in_user() no users left Signed-off-by: Al Viro --- include/linux/uaccess.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 201418d5e15c..97c93bc6f72a 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -180,12 +180,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n) } #ifdef CONFIG_COMPAT static __always_inline unsigned long __must_check -__copy_in_user(void __user *to, const void *from, unsigned long n) -{ - might_fault(); - return raw_copy_in_user(to, from, n); -} -static __always_inline unsigned long __must_check copy_in_user(void __user *to, const void *from, unsigned long n) { might_fault(); -- cgit v1.2.3 From ad0af7104dadccd55cd2b390271677fac142650f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 21 Jun 2017 15:28:32 +0300 Subject: vfs: introduce inode 'inuse' lock Added an i_state flag I_INUSE and helpers to set/clear/test the bit. The 'inuse' lock is an 'advisory' inode lock, that can be used to extend exclusive create protection beyond parent->i_mutex lock among cooperating users. This is going to be used by overlayfs to get exclusive ownership on upper and work dirs among overlayfs mounts. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e68cabb8457..75a5fafaf096 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1930,6 +1930,9 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode) * wb stat updates to grab mapping->tree_lock. See * inode_switch_wb_work_fn() for details. * + * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper + * and work dirs among overlayfs mounts. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -1950,6 +1953,7 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode) #define __I_DIRTY_TIME_EXPIRED 12 #define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) #define I_WB_SWITCH (1 << 13) +#define I_OVL_INUSE (1 << 14) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) -- cgit v1.2.3 From 458bc30cec26c2716746ae215ed23773257e417d Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Tue, 4 Jul 2017 15:53:01 +0300 Subject: net, atm: convert atm_dev.refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- include/linux/atmdev.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 4d97a89da066..0ec9bdb1cc9f 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #ifdef CONFIG_PROC_FS @@ -158,7 +159,7 @@ struct atm_dev { struct k_atm_dev_stats stats; /* statistics */ char signal; /* signal status (ATM_PHY_SIG_*) */ int link_rate; /* link rate (default: OC3) */ - atomic_t refcnt; /* reference count */ + refcount_t refcnt; /* reference count */ spinlock_t lock; /* protect internal members */ #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; /* proc entry */ @@ -261,13 +262,13 @@ static inline int atm_may_send(struct atm_vcc *vcc,unsigned int size) static inline void atm_dev_hold(struct atm_dev *dev) { - atomic_inc(&dev->refcnt); + refcount_inc(&dev->refcnt); } static inline void atm_dev_put(struct atm_dev *dev) { - if (atomic_dec_and_test(&dev->refcnt)) { + if (refcount_dec_and_test(&dev->refcnt)) { BUG_ON(!test_bit(ATM_DF_REMOVED, &dev->flags)); if (dev->ops->dev_close) dev->ops->dev_close(dev); -- cgit v1.2.3 From 0fa104726b6cc7b1ebb4c60d55cb6abda745f4b6 Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Tue, 4 Jul 2017 15:53:13 +0300 Subject: net, sunrpc: convert gss_cl_ctx.count from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- include/linux/sunrpc/auth_gss.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 36eebc451b41..cebdf8745901 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -13,6 +13,7 @@ #define _LINUX_SUNRPC_AUTH_GSS_H #ifdef __KERNEL__ +#include #include #include #include @@ -65,7 +66,7 @@ struct rpc_gss_init_res { * the wire when communicating with a server. */ struct gss_cl_ctx { - atomic_t count; + refcount_t count; enum rpc_gss_proc gc_proc; u32 gc_seq; spinlock_t gc_seq_lock; -- cgit v1.2.3 From cd0ae1d395a8bfc208437ce612413e58f5137499 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 21 Jun 2017 19:23:18 +0200 Subject: s390/crash: Remove unused KEXEC_NOTE_BYTES After commmit 692f66f26a4c19 ("crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE") the KEXEC_NOTE_BYTES macro is not used anymore and for s390 we create the ELF header in the new kernel anyway. Therefore remove the macro. Reported-by: Xunlei Pang Reviewed-by: Mikhail Zaslonko Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- include/linux/crash_core.h | 5 +++++ include/linux/kexec.h | 9 --------- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 541a197ba4a2..4090a42578a8 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -10,6 +10,11 @@ #define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) #define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) +/* + * The per-cpu notes area is a list of notes terminated by a "NULL" + * note header. For kdump, the code in vmcore.c runs in the context + * of the second kernel to combine them into one note. + */ #define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ CRASH_CORE_NOTE_NAME_BYTES + \ CRASH_CORE_NOTE_DESC_BYTES) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c9481ebcbc0c..65888418fb69 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -62,15 +62,6 @@ #define KEXEC_CORE_NOTE_NAME CRASH_CORE_NOTE_NAME -/* - * The per-cpu notes area is a list of notes terminated by a "NULL" - * note header. For kdump, the code in vmcore.c runs in the context - * of the second kernel to combine them into one note. - */ -#ifndef KEXEC_NOTE_BYTES -#define KEXEC_NOTE_BYTES CRASH_CORE_NOTE_BYTES -#endif - /* * This structure is used to hold the arguments that are used when loading * kernel binaries. -- cgit v1.2.3 From 1c3eda01a79b8e9237d91c52c5a75b20983f47c6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 29 Jun 2017 19:15:07 +0200 Subject: vtime, sched/cputime: Remove vtime_account_user() It's an unnecessary function between vtime_user_exit() and account_user_time(). Tested-by: Luiz Capitulino Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1498756511-11714-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/vtime.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 0681fe25abeb..18b405e3cd93 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -67,19 +67,12 @@ static inline void vtime_account_system(struct task_struct *tsk) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void arch_vtime_task_switch(struct task_struct *tsk); -extern void vtime_account_user(struct task_struct *tsk); extern void vtime_user_enter(struct task_struct *tsk); - -static inline void vtime_user_exit(struct task_struct *tsk) -{ - vtime_account_user(tsk); -} - +extern void vtime_user_exit(struct task_struct *tsk); extern void vtime_guest_enter(struct task_struct *tsk); extern void vtime_guest_exit(struct task_struct *tsk); extern void vtime_init_idle(struct task_struct *tsk, int cpu); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ -static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_user_enter(struct task_struct *tsk) { } static inline void vtime_user_exit(struct task_struct *tsk) { } static inline void vtime_guest_enter(struct task_struct *tsk) { } -- cgit v1.2.3 From 60a9ce57e7c5ac1df3a39fb941022bbfa40c0862 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 29 Jun 2017 19:15:09 +0200 Subject: sched/cputime: Rename vtime fields The current "snapshot" based naming on vtime fields suggests we record some past event but that's a low level picture of their actual purpose which comes out blurry. The real point of these fields is to run a basic state machine that tracks down cputime entry while switching between contexts. So lets reflect that with more meaningful names. Tested-by: Luiz Capitulino Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1498756511-11714-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 4 ++-- include/linux/sched.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e049526bc188..3d537331cd4e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -171,8 +171,8 @@ extern struct cred init_cred; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ - .vtime_snap = 0, \ - .vtime_snap_whence = VTIME_SYS, + .vtime_starttime = 0, \ + .vtime_state = VTIME_SYS, #else # define INIT_VTIME(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c4ca7433d9d..ff001646549e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -689,7 +689,7 @@ struct task_struct { struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqcount_t vtime_seqcount; - unsigned long long vtime_snap; + unsigned long long vtime_starttime; enum { /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, @@ -697,7 +697,7 @@ struct task_struct { VTIME_USER, /* Task runs in kernelspace in a CPU with VTIME active: */ VTIME_SYS, - } vtime_snap_whence; + } vtime_state; #endif #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3 From bac5b6b6b11560f323e71d0ebac4061cfe5f56c0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 29 Jun 2017 19:15:10 +0200 Subject: sched/cputime: Move the vtime task fields to their own struct We are about to add vtime accumulation fields to the task struct. Let's avoid more bloatification and gather vtime information to their own struct. Tested-by: Luiz Capitulino Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1498756511-11714-5-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 6 +++--- include/linux/sched.h | 26 ++++++++++++++++---------- 2 files changed, 19 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 3d537331cd4e..a2f6707e9fc0 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -170,9 +170,9 @@ extern struct cred init_cred; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ - .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ - .vtime_starttime = 0, \ - .vtime_state = VTIME_SYS, + .vtime.seqcount = SEQCNT_ZERO(tsk.vtime.seqcount), \ + .vtime.starttime = 0, \ + .vtime.state = VTIME_SYS, #else # define INIT_VTIME(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index ff001646549e..eeff8a024f0c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -223,6 +223,21 @@ struct task_cputime { #define prof_exp stime #define sched_exp sum_exec_runtime +enum vtime_state { + /* Task is sleeping or running in a CPU with VTIME inactive: */ + VTIME_INACTIVE = 0, + /* Task runs in userspace in a CPU with VTIME active: */ + VTIME_USER, + /* Task runs in kernelspace in a CPU with VTIME active: */ + VTIME_SYS, +}; + +struct vtime { + seqcount_t seqcount; + unsigned long long starttime; + enum vtime_state state; +}; + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ @@ -688,16 +703,7 @@ struct task_struct { u64 gtime; struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqcount_t vtime_seqcount; - unsigned long long vtime_starttime; - enum { - /* Task is sleeping or running in a CPU with VTIME inactive: */ - VTIME_INACTIVE = 0, - /* Task runs in userspace in a CPU with VTIME active: */ - VTIME_USER, - /* Task runs in kernelspace in a CPU with VTIME active: */ - VTIME_SYS, - } vtime_state; + struct vtime vtime; #endif #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3 From 2a42eb9594a1480b4ead9e036e06ee1290e5fa6d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 29 Jun 2017 19:15:11 +0200 Subject: sched/cputime: Accumulate vtime on top of nsec clocksource Currently the cputime source used by vtime is jiffies. When we cross a context boundary and jiffies have changed since the last snapshot, the pending cputime is accounted to the switching out context. This system works ok if the ticks are not aligned across CPUs. If they instead are aligned (ie: all fire at the same time) and the CPUs run in userspace, the jiffies change is only observed on tick exit and therefore the user cputime is accounted as system cputime. This is because the CPU that maintains timekeeping fires its tick at the same time as the others. It updates jiffies in the middle of the tick and the other CPUs see that update on IRQ exit: CPU 0 (timekeeper) CPU 1 ------------------- ------------- jiffies = N ... run in userspace for a jiffy tick entry tick entry (sees jiffies = N) set jiffies = N + 1 tick exit tick exit (sees jiffies = N + 1) account 1 jiffy as stime Fix this with using a nanosec clock source instead of jiffies. The cputime is then accumulated and flushed everytime the pending delta reaches a jiffy in order to mitigate the accounting overhead. [ fweisbec: changelog, rebase on struct vtime, field renames, add delta on cputime readers, keep idle vtime as-is (low overhead accounting), harmonize clock sources. ] Suggested-by: Thomas Gleixner Reported-by: Luiz Capitulino Tested-by: Luiz Capitulino Signed-off-by: Wanpeng Li Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1498756511-11714-6-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eeff8a024f0c..4818126c5153 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -236,6 +236,9 @@ struct vtime { seqcount_t seqcount; unsigned long long starttime; enum vtime_state state; + u64 utime; + u64 stime; + u64 gtime; }; struct sched_info { -- cgit v1.2.3 From 86d35afb8e07d99f8bfba4eadf93d918b4741f66 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Jul 2017 10:14:38 +0200 Subject: MAINTAINERS: Add Frederic Weisbecker as nohz/dyntics maintainer Frederic has been improving and maintaining the nohz/dynticks kernel features for years, so make his de facto maintainership official. Acked-by: Thomas Gleixner Acked-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 7d3f75db23e5..028d17b918a7 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -2,7 +2,7 @@ #define _LINUX_SCHED_NOHZ_H /* - * This is the interface between the scheduler and nohz/dyntics: + * This is the interface between the scheduler and nohz/dynticks: */ #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -- cgit v1.2.3 From 2b69c8280c8b29cdeb78b8e92e20ed35f730d319 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Jul 2017 15:26:48 -0400 Subject: mm: drop "wait" parameter from write_one_page() The callers all set it to 1. Also, make it clear that this function will not set any sort of AS_* error, and that the caller must do so if necessary. No existing caller uses this on normal files, so none of them need it. Also, add __must_check here since, in general, the callers need to handle an error here in some fashion. Link: http://lkml.kernel.org/r/20170525103303.6524-1-jlayton@redhat.com Signed-off-by: Jeff Layton Reviewed-by: Ross Zwisler Reviewed-by: Jan Kara Reviewed-by: Matthew Wilcox Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7cb17c6b97de..ca9c8b27cecb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2199,7 +2199,7 @@ extern void filemap_map_pages(struct vm_fault *vmf, extern int filemap_page_mkwrite(struct vm_fault *vmf); /* mm/page-writeback.c */ -int write_one_page(struct page *page, int wait); +int __must_check write_one_page(struct page *page); void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ -- cgit v1.2.3 From 0f41074a65757b46acbdd4293f0de8a70b147406 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Jul 2017 15:26:50 -0400 Subject: fs: remove call_fsync helper function Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Carlos Maiolino Signed-off-by: Jeff Layton --- include/linux/fs.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..2d9e71e2a308 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1739,12 +1739,6 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma) return file->f_op->mmap(file, vma); } -static inline int call_fsync(struct file *file, loff_t start, loff_t end, - int datasync) -{ - return file->f_op->fsync(file, start, end, datasync); -} - ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, -- cgit v1.2.3 From af65207c76ce8e6263a3b097ea35365dde9913d0 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Thu, 6 Jul 2017 00:01:59 -0400 Subject: ext4: fix __ext4_new_inode() journal credits calculation ea_inode feature allows creating extended attributes that are up to 64k in size. Update __ext4_new_inode() to pick increased credit limits. To avoid overallocating too many journal credits, update __ext4_xattr_set_credits() to make a distinction between xattr create vs update. This helps __ext4_new_inode() because all attributes are known to be new, so we can save credits that are normally needed to delete old values. Also, have fscrypt specify its maximum context size so that we don't end up allocating credits for 64k size. Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h index 0a30c106c1e5..82beaf70e7e2 100644 --- a/include/linux/fscrypt_common.h +++ b/include/linux/fscrypt_common.h @@ -83,6 +83,9 @@ struct fscrypt_operations { unsigned (*max_namelen)(struct inode *); }; +/* Maximum value for the third parameter of fscrypt_operations.set_context(). */ +#define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 + static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { if (inode->i_sb->s_cop->dummy_context && -- cgit v1.2.3 From f35157417215ec138c920320c746fdb3e04ef1d5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Jul 2017 17:25:02 +0100 Subject: Provide a function to create a NUL-terminated string from unterminated data Provide a function, kmemdup_nul(), that will create a NUL-terminated string from an unterminated character array where the length is known in advance. This is better than kstrndup() in situations where we already know the string length as the strnlen() in kstrndup() is superfluous. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 537918f8a98e..3dd944cfe171 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -131,6 +131,7 @@ extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); +extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); -- cgit v1.2.3 From ee416bcdba9975065de571e09de1f7ebfde2156a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Jul 2017 17:25:16 +0100 Subject: VFS: Make get_filesystem() return the affected filesystem Make get_filesystem() return a pointer to the filesystem on which it just got a ref. Suggested-by: Rasmus Villemoes Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..bc0c054894b9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2956,7 +2956,7 @@ extern int generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, get_block_t *get_block); -extern void get_filesystem(struct file_system_type *fs); +extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); -- cgit v1.2.3 From cdf01226b26e98c79c13b335fbe0cbbbe850cf44 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Jul 2017 17:25:22 +0100 Subject: VFS: Provide empty name qstr Provide an empty name (ie. "") qstr for general use. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/dcache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d2e38dc6172c..3f65a4fa72ed 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -55,6 +55,11 @@ struct qstr { #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } +extern const char empty_string[]; +extern const struct qstr empty_name; +extern const char slash_string[]; +extern const struct qstr slash_name; + struct dentry_stat_t { long nr_dentry; long nr_unused; -- cgit v1.2.3 From 87d284443d071dc70344dda4b2fb43723686acdb Mon Sep 17 00:00:00 2001 From: Steven Feng Date: Tue, 23 May 2017 15:13:24 +0800 Subject: mfd: rtsx: Do retry when DMA transfer error The request should be resent when DMA transfer error occurred. For rts5227, the clock rate needs to be reduced when error occurred. Signed-off-by: Steven Feng Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 7eb7cbac0a9a..116816fb9110 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -850,6 +850,9 @@ #define rtsx_pci_init_cmd(pcr) ((pcr)->ci = 0) +#define RTS5227_DEVICE_ID 0x5227 +#define RTS_MAX_TIMES_FREQ_REDUCTION 8 + struct rtsx_pcr; struct pcr_handle { @@ -957,6 +960,8 @@ struct rtsx_pcr { int num_slots; struct rtsx_slot *slots; + + u8 dma_error_count; }; #define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) -- cgit v1.2.3 From 1e3496000c11ec1ec56cf664b6a01d66de423507 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 13 Jun 2017 10:28:40 +0530 Subject: mfd: Add LP87565 PMIC support The LP87565 chip is a power management IC for Portable Navigation Systems and Tablet Computing devices. It contains the following components: - Configurable Bucks(Single and multi-phase). - Configurable General Purpose Output Signals (GPO). The LP87565-Q1 variant device uses two 2-phase outputs configuration, Buck0 is master for Buck0/1 output and Buck2 is master for Buck2/3 output. Signed-off-by: Keerthy Acked-by: Rob Herring Signed-off-by: Lee Jones --- include/linux/mfd/lp87565.h | 270 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 include/linux/mfd/lp87565.h (limited to 'include/linux') diff --git a/include/linux/mfd/lp87565.h b/include/linux/mfd/lp87565.h new file mode 100644 index 000000000000..d0c91ba65525 --- /dev/null +++ b/include/linux/mfd/lp87565.h @@ -0,0 +1,270 @@ +/* + * Functions to access LP87565 power management chip. + * + * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + */ + +#ifndef __LINUX_MFD_LP87565_H +#define __LINUX_MFD_LP87565_H + +#include +#include +#include + +enum lp87565_device_type { + LP87565_DEVICE_TYPE_UNKNOWN = 0, + LP87565_DEVICE_TYPE_LP87565_Q1, +}; + +/* All register addresses */ +#define LP87565_REG_DEV_REV 0X00 +#define LP87565_REG_OTP_REV 0X01 +#define LP87565_REG_BUCK0_CTRL_1 0X02 +#define LP87565_REG_BUCK0_CTRL_2 0X03 + +#define LP87565_REG_BUCK1_CTRL_1 0X04 +#define LP87565_REG_BUCK1_CTRL_2 0X05 + +#define LP87565_REG_BUCK2_CTRL_1 0X06 +#define LP87565_REG_BUCK2_CTRL_2 0X07 + +#define LP87565_REG_BUCK3_CTRL_1 0X08 +#define LP87565_REG_BUCK3_CTRL_2 0X09 + +#define LP87565_REG_BUCK0_VOUT 0X0A +#define LP87565_REG_BUCK0_FLOOR_VOUT 0X0B + +#define LP87565_REG_BUCK1_VOUT 0X0C +#define LP87565_REG_BUCK1_FLOOR_VOUT 0X0D + +#define LP87565_REG_BUCK2_VOUT 0X0E +#define LP87565_REG_BUCK2_FLOOR_VOUT 0X0F + +#define LP87565_REG_BUCK3_VOUT 0X10 +#define LP87565_REG_BUCK3_FLOOR_VOUT 0X11 + +#define LP87565_REG_BUCK0_DELAY 0X12 +#define LP87565_REG_BUCK1_DELAY 0X13 + +#define LP87565_REG_BUCK2_DELAY 0X14 +#define LP87565_REG_BUCK3_DELAY 0X15 + +#define LP87565_REG_GPO2_DELAY 0X16 +#define LP87565_REG_GPO3_DELAY 0X17 +#define LP87565_REG_RESET 0X18 +#define LP87565_REG_CONFIG 0X19 + +#define LP87565_REG_INT_TOP_1 0X1A +#define LP87565_REG_INT_TOP_2 0X1B + +#define LP87565_REG_INT_BUCK_0_1 0X1C +#define LP87565_REG_INT_BUCK_2_3 0X1D +#define LP87565_REG_TOP_STAT 0X1E +#define LP87565_REG_BUCK_0_1_STAT 0X1F +#define LP87565_REG_BUCK_2_3_STAT 0x20 + +#define LP87565_REG_TOP_MASK_1 0x21 +#define LP87565_REG_TOP_MASK_2 0x22 + +#define LP87565_REG_BUCK_0_1_MASK 0x23 +#define LP87565_REG_BUCK_2_3_MASK 0x24 +#define LP87565_REG_SEL_I_LOAD 0x25 + +#define LP87565_REG_I_LOAD_2 0x26 +#define LP87565_REG_I_LOAD_1 0x27 + +#define LP87565_REG_PGOOD_CTRL1 0x28 +#define LP87565_REG_PGOOD_CTRL2 0x29 +#define LP87565_REG_PGOOD_FLT 0x2A +#define LP87565_REG_PLL_CTRL 0x2B +#define LP87565_REG_PIN_FUNCTION 0x2C +#define LP87565_REG_GPIO_CONFIG 0x2D +#define LP87565_REG_GPIO_IN 0x2E +#define LP87565_REG_GPIO_OUT 0x2F + +#define LP87565_REG_MAX LP87565_REG_GPIO_OUT + +/* Register field definitions */ +#define LP87565_DEV_REV_DEV_ID 0xC0 +#define LP87565_DEV_REV_ALL_LAYER 0x30 +#define LP87565_DEV_REV_METAL_LAYER 0x0F + +#define LP87565_OTP_REV_OTP_ID 0xFF + +#define LP87565_BUCK_CTRL_1_EN BIT(7) +#define LP87565_BUCK_CTRL_1_EN_PIN_CTRL BIT(6) +#define LP87565_BUCK_CTRL_1_PIN_SELECT_EN 0x30 + +#define LP87565_BUCK_CTRL_1_ROOF_FLOOR_EN BIT(3) +#define LP87565_BUCK_CTRL_1_RDIS_EN BIT(2) +#define LP87565_BUCK_CTRL_1_FPWM BIT(1) +/* Bit0 is reserved for BUCK1 and BUCK3 and valid only for BUCK0 and BUCK2 */ +#define LP87565_BUCK_CTRL_1_FPWM_MP_0_2 BIT(0) + +#define LP87565_BUCK_CTRL_2_ILIM 0x38 +#define LP87565_BUCK_CTRL_2_SLEW_RATE 0x07 + +#define LP87565_BUCK_VSET 0xFF +#define LP87565_BUCK_FLOOR_VSET 0xFF + +#define LP87565_BUCK_SHUTDOWN_DELAY 0xF0 +#define LP87565_BUCK_STARTUP_DELAY 0x0F + +#define LP87565_GPIO_SHUTDOWN_DELAY 0xF0 +#define LP87565_GPIO_STARTUP_DELAY 0x0F + +#define LP87565_RESET_SW_RESET BIT(0) + +#define LP87565_CONFIG_DOUBLE_DELAY BIT(7) +#define LP87565_CONFIG_CLKIN_PD BIT(6) +#define LP87565_CONFIG_EN4_PD BIT(5) +#define LP87565_CONFIG_EN3_PD BIT(4) +#define LP87565_CONFIG_TDIE_WARN_LEVEL BIT(3) +#define LP87565_CONFIG_EN2_PD BIT(2) +#define LP87565_CONFIG_EN1_PD BIT(1) + +#define LP87565_INT_GPIO BIT(7) +#define LP87565_INT_BUCK23 BIT(6) +#define LP87565_INT_BUCK01 BIT(5) +#define LP87565_NO_SYNC_CLK BIT(4) +#define LP87565_TDIE_SD BIT(3) +#define LP87565_TDIE_WARN BIT(2) +#define LP87565_INT_OVP BIT(1) +#define LP87565_I_LOAD_READY BIT(0) + +#define LP87565_INT_TOP2_RESET_REG BIT(0) + +#define LP87565_BUCK1_PG_INT BIT(6) +#define LP87565_BUCK1_SC_INT BIT(5) +#define LP87565_BUCK1_ILIM_INT BIT(4) +#define LP87565_BUCK0_PG_INT BIT(2) +#define LP87565_BUCK0_SC_INT BIT(1) +#define LP87565_BUCK0_ILIM_INT BIT(0) + +#define LP87565_BUCK3_PG_INT BIT(6) +#define LP87565_BUCK3_SC_INT BIT(5) +#define LP87565_BUCK3_ILIM_INT BIT(4) +#define LP87565_BUCK2_PG_INT BIT(2) +#define LP87565_BUCK2_SC_INT BIT(1) +#define LP87565_BUCK2_ILIM_INT BIT(0) + +#define LP87565_SYNC_CLK_STAT BIT(4) +#define LP87565_TDIE_SD_STAT BIT(3) +#define LP87565_TDIE_WARN_STAT BIT(2) +#define LP87565_OVP_STAT BIT(1) + +#define LP87565_BUCK1_STAT BIT(7) +#define LP87565_BUCK1_PG_STAT BIT(6) +#define LP87565_BUCK1_ILIM_STAT BIT(4) +#define LP87565_BUCK0_STAT BIT(3) +#define LP87565_BUCK0_PG_STAT BIT(2) +#define LP87565_BUCK0_ILIM_STAT BIT(0) + +#define LP87565_BUCK3_STAT BIT(7) +#define LP87565_BUCK3_PG_STAT BIT(6) +#define LP87565_BUCK3_ILIM_STAT BIT(4) +#define LP87565_BUCK2_STAT BIT(3) +#define LP87565_BUCK2_PG_STAT BIT(2) +#define LP87565_BUCK2_ILIM_STAT BIT(0) + +#define LPL87565_GPIO_MASK BIT(7) +#define LPL87565_SYNC_CLK_MASK BIT(4) +#define LPL87565_TDIE_WARN_MASK BIT(2) +#define LPL87565_I_LOAD_READY_MASK BIT(0) + +#define LPL87565_RESET_REG_MASK BIT(0) + +#define LPL87565_BUCK1_PG_MASK BIT(6) +#define LPL87565_BUCK1_ILIM_MASK BIT(4) +#define LPL87565_BUCK0_PG_MASK BIT(2) +#define LPL87565_BUCK0_ILIM_MASK BIT(0) + +#define LPL87565_BUCK3_PG_MASK BIT(6) +#define LPL87565_BUCK3_ILIM_MASK BIT(4) +#define LPL87565_BUCK2_PG_MASK BIT(2) +#define LPL87565_BUCK2_ILIM_MASK BIT(0) + +#define LP87565_LOAD_CURRENT_BUCK_SELECT 0x3 + +#define LP87565_I_LOAD2_BUCK_LOAD_CURRENT 0x3 +#define LP87565_I_LOAD1_BUCK_LOAD_CURRENT 0xFF + +#define LP87565_PG3_SEL 0xC0 +#define LP87565_PG2_SEL 0x30 +#define LP87565_PG1_SEL 0x0C +#define LP87565_PG0_SEL 0x03 + +#define LP87565_HALF_DAY BIT(7) +#define LP87565_EN_PG0_NINT BIT(6) +#define LP87565_PGOOD_SET_DELAY BIT(5) +#define LP87565_EN_PGFLT_STAT BIT(4) +#define LP87565_PGOOD_WINDOW BIT(2) +#define LP87565_PGOOD_OD BIT(1) +#define LP87565_PGOOD_POL BIT(0) + +#define LP87565_PG3_FLT BIT(3) +#define LP87565_PG2_FLT BIT(2) +#define LP87565_PG1_FLT BIT(1) +#define LP87565_PG0_FLT BIT(0) + +#define LP87565_PLL_MODE 0xC0 +#define LP87565_EXT_CLK_FREQ 0x1F + +#define LP87565_EN_SPREAD_SPEC BIT(7) +#define LP87565_EN_PIN_CTRL_GPIO3 BIT(6) +#define LP87565_EN_PIN_SELECT_GPIO3 BIT(5) +#define LP87565_EN_PIN_CTRL_GPIO2 BIT(4) +#define LP87565_EN_PIN_SELECT_GPIO2 BIT(3) +#define LP87565_GPIO3_SEL BIT(2) +#define LP87565_GPIO2_SEL BIT(1) +#define LP87565_GPIO1_SEL BIT(0) + +#define LP87565_GOIO3_OD BIT(6) +#define LP87565_GOIO2_OD BIT(5) +#define LP87565_GOIO1_OD BIT(4) +#define LP87565_GOIO3_DIR BIT(2) +#define LP87565_GOIO2_DIR BIT(1) +#define LP87565_GOIO1_DIR BIT(0) + +#define LP87565_GOIO3_IN BIT(2) +#define LP87565_GOIO2_IN BIT(1) +#define LP87565_GOIO1_IN BIT(0) + +#define LP87565_GOIO3_OUT BIT(2) +#define LP87565_GOIO2_OUT BIT(1) +#define LP87565_GOIO1_OUT BIT(0) + +/* Number of step-down converters available */ +#define LP87565_NUM_BUCK 6 + +enum LP87565_regulator_id { + /* BUCK's */ + LP87565_BUCK_0, + LP87565_BUCK_1, + LP87565_BUCK_2, + LP87565_BUCK_3, + LP87565_BUCK_10, + LP87565_BUCK_23, +}; + +/** + * struct LP87565 - state holder for the LP87565 driver + * @dev: struct device pointer for MFD device + * @rev: revision of the LP87565 + * @dev_type: The device type for example lp87565-q1 + * @lock: lock guarding the data structure + * @regmap: register map of the LP87565 PMIC + * + * Device data may be used to access the LP87565 chip + */ +struct lp87565 { + struct device *dev; + u8 rev; + u8 dev_type; + struct regmap *regmap; +}; +#endif /* __LINUX_MFD_LP87565_H */ -- cgit v1.2.3 From 4a25220d4e43bb2461823dbc7eb1502a34087958 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Jul 2017 16:24:18 +0100 Subject: hugetlbfs: Implement show_options Implement the show_options superblock op for hugetlbfs as part of a bid to get rid of s_options and generic_show_options() to make it easier to implement a context-based mount where the mount options can be passed individually over a file descriptor. Note that the uid and gid should possibly be displayed relative to the viewer's user namespace. Signed-off-by: David Howells cc: Nadia Yvette Chambers Signed-off-by: Al Viro --- include/linux/hugetlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b857fc8cc2ec..3b6eeaad2f77 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -262,6 +262,9 @@ struct hugetlbfs_sb_info { spinlock_t stat_lock; struct hstate *hstate; struct hugepage_subpool *spool; + kuid_t uid; + kgid_t gid; + umode_t mode; }; static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) -- cgit v1.2.3 From 8476d6cde2cd11a2cb87bd7392fc318eec25c8a0 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 22 May 2017 00:09:52 +0200 Subject: backlight: adp8860: Move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Daniel Thompson Acked-by: Michael Hennerich Signed-off-by: Lee Jones --- include/linux/i2c/adp8860.h | 154 ---------------------------------- include/linux/platform_data/adp8860.h | 154 ++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 154 deletions(-) delete mode 100644 include/linux/i2c/adp8860.h create mode 100644 include/linux/platform_data/adp8860.h (limited to 'include/linux') diff --git a/include/linux/i2c/adp8860.h b/include/linux/i2c/adp8860.h deleted file mode 100644 index 0b4d39855c91..000000000000 --- a/include/linux/i2c/adp8860.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Definitions and platform data for Analog Devices - * Backlight drivers ADP8860 - * - * Copyright 2009-2010 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#ifndef __LINUX_I2C_ADP8860_H -#define __LINUX_I2C_ADP8860_H - -#include -#include - -#define ID_ADP8860 8860 - -#define ADP8860_MAX_BRIGHTNESS 0x7F -#define FLAG_OFFT_SHIFT 8 - -/* - * LEDs subdevice platform data - */ - -#define ADP8860_LED_DIS_BLINK (0 << FLAG_OFFT_SHIFT) -#define ADP8860_LED_OFFT_600ms (1 << FLAG_OFFT_SHIFT) -#define ADP8860_LED_OFFT_1200ms (2 << FLAG_OFFT_SHIFT) -#define ADP8860_LED_OFFT_1800ms (3 << FLAG_OFFT_SHIFT) - -#define ADP8860_LED_ONT_200ms 0 -#define ADP8860_LED_ONT_600ms 1 -#define ADP8860_LED_ONT_800ms 2 -#define ADP8860_LED_ONT_1200ms 3 - -#define ADP8860_LED_D7 (7) -#define ADP8860_LED_D6 (6) -#define ADP8860_LED_D5 (5) -#define ADP8860_LED_D4 (4) -#define ADP8860_LED_D3 (3) -#define ADP8860_LED_D2 (2) -#define ADP8860_LED_D1 (1) - -/* - * Backlight subdevice platform data - */ - -#define ADP8860_BL_D7 (1 << 6) -#define ADP8860_BL_D6 (1 << 5) -#define ADP8860_BL_D5 (1 << 4) -#define ADP8860_BL_D4 (1 << 3) -#define ADP8860_BL_D3 (1 << 2) -#define ADP8860_BL_D2 (1 << 1) -#define ADP8860_BL_D1 (1 << 0) - -#define ADP8860_FADE_T_DIS 0 /* Fade Timer Disabled */ -#define ADP8860_FADE_T_300ms 1 /* 0.3 Sec */ -#define ADP8860_FADE_T_600ms 2 -#define ADP8860_FADE_T_900ms 3 -#define ADP8860_FADE_T_1200ms 4 -#define ADP8860_FADE_T_1500ms 5 -#define ADP8860_FADE_T_1800ms 6 -#define ADP8860_FADE_T_2100ms 7 -#define ADP8860_FADE_T_2400ms 8 -#define ADP8860_FADE_T_2700ms 9 -#define ADP8860_FADE_T_3000ms 10 -#define ADP8860_FADE_T_3500ms 11 -#define ADP8860_FADE_T_4000ms 12 -#define ADP8860_FADE_T_4500ms 13 -#define ADP8860_FADE_T_5000ms 14 -#define ADP8860_FADE_T_5500ms 15 /* 5.5 Sec */ - -#define ADP8860_FADE_LAW_LINEAR 0 -#define ADP8860_FADE_LAW_SQUARE 1 -#define ADP8860_FADE_LAW_CUBIC1 2 -#define ADP8860_FADE_LAW_CUBIC2 3 - -#define ADP8860_BL_AMBL_FILT_80ms 0 /* Light sensor filter time */ -#define ADP8860_BL_AMBL_FILT_160ms 1 -#define ADP8860_BL_AMBL_FILT_320ms 2 -#define ADP8860_BL_AMBL_FILT_640ms 3 -#define ADP8860_BL_AMBL_FILT_1280ms 4 -#define ADP8860_BL_AMBL_FILT_2560ms 5 -#define ADP8860_BL_AMBL_FILT_5120ms 6 -#define ADP8860_BL_AMBL_FILT_10240ms 7 /* 10.24 sec */ - -/* - * Blacklight current 0..30mA - */ -#define ADP8860_BL_CUR_mA(I) ((I * 127) / 30) - -/* - * L2 comparator current 0..1106uA - */ -#define ADP8860_L2_COMP_CURR_uA(I) ((I * 255) / 1106) - -/* - * L3 comparator current 0..138uA - */ -#define ADP8860_L3_COMP_CURR_uA(I) ((I * 255) / 138) - -struct adp8860_backlight_platform_data { - u8 bl_led_assign; /* 1 = Backlight 0 = Individual LED */ - - u8 bl_fade_in; /* Backlight Fade-In Timer */ - u8 bl_fade_out; /* Backlight Fade-Out Timer */ - u8 bl_fade_law; /* fade-on/fade-off transfer characteristic */ - - u8 en_ambl_sens; /* 1 = enable ambient light sensor */ - u8 abml_filt; /* Light sensor filter time */ - - u8 l1_daylight_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l1_daylight_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l2_office_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l2_office_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l3_dark_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l3_dark_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ - - u8 l2_trip; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ - u8 l2_hyst; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ - u8 l3_trip; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ - u8 l3_hyst; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ - - /** - * Independent Current Sinks / LEDS - * Sinks not assigned to the Backlight can be exposed to - * user space using the LEDS CLASS interface - */ - - int num_leds; - struct led_info *leds; - u8 led_fade_in; /* LED Fade-In Timer */ - u8 led_fade_out; /* LED Fade-Out Timer */ - u8 led_fade_law; /* fade-on/fade-off transfer characteristic */ - u8 led_on_time; - - /** - * Gain down disable. Setting this option does not allow the - * charge pump to switch to lower gains. NOT AVAILABLE on ADP8860 - * 1 = the charge pump doesn't switch down in gain until all LEDs are 0. - * The charge pump switches up in gain as needed. This feature is - * useful if the ADP8863 charge pump is used to drive an external load. - * This feature must be used when utilizing small fly capacitors - * (0402 or smaller). - * 0 = the charge pump automatically switches up and down in gain. - * This provides optimal efficiency, but is not suitable for driving - * loads that are not connected through the ADP8863 diode drivers. - * Additionally, the charge pump fly capacitors should be low ESR - * and sized 0603 or greater. - */ - - u8 gdwn_dis; -}; - -#endif /* __LINUX_I2C_ADP8860_H */ diff --git a/include/linux/platform_data/adp8860.h b/include/linux/platform_data/adp8860.h new file mode 100644 index 000000000000..0b4d39855c91 --- /dev/null +++ b/include/linux/platform_data/adp8860.h @@ -0,0 +1,154 @@ +/* + * Definitions and platform data for Analog Devices + * Backlight drivers ADP8860 + * + * Copyright 2009-2010 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef __LINUX_I2C_ADP8860_H +#define __LINUX_I2C_ADP8860_H + +#include +#include + +#define ID_ADP8860 8860 + +#define ADP8860_MAX_BRIGHTNESS 0x7F +#define FLAG_OFFT_SHIFT 8 + +/* + * LEDs subdevice platform data + */ + +#define ADP8860_LED_DIS_BLINK (0 << FLAG_OFFT_SHIFT) +#define ADP8860_LED_OFFT_600ms (1 << FLAG_OFFT_SHIFT) +#define ADP8860_LED_OFFT_1200ms (2 << FLAG_OFFT_SHIFT) +#define ADP8860_LED_OFFT_1800ms (3 << FLAG_OFFT_SHIFT) + +#define ADP8860_LED_ONT_200ms 0 +#define ADP8860_LED_ONT_600ms 1 +#define ADP8860_LED_ONT_800ms 2 +#define ADP8860_LED_ONT_1200ms 3 + +#define ADP8860_LED_D7 (7) +#define ADP8860_LED_D6 (6) +#define ADP8860_LED_D5 (5) +#define ADP8860_LED_D4 (4) +#define ADP8860_LED_D3 (3) +#define ADP8860_LED_D2 (2) +#define ADP8860_LED_D1 (1) + +/* + * Backlight subdevice platform data + */ + +#define ADP8860_BL_D7 (1 << 6) +#define ADP8860_BL_D6 (1 << 5) +#define ADP8860_BL_D5 (1 << 4) +#define ADP8860_BL_D4 (1 << 3) +#define ADP8860_BL_D3 (1 << 2) +#define ADP8860_BL_D2 (1 << 1) +#define ADP8860_BL_D1 (1 << 0) + +#define ADP8860_FADE_T_DIS 0 /* Fade Timer Disabled */ +#define ADP8860_FADE_T_300ms 1 /* 0.3 Sec */ +#define ADP8860_FADE_T_600ms 2 +#define ADP8860_FADE_T_900ms 3 +#define ADP8860_FADE_T_1200ms 4 +#define ADP8860_FADE_T_1500ms 5 +#define ADP8860_FADE_T_1800ms 6 +#define ADP8860_FADE_T_2100ms 7 +#define ADP8860_FADE_T_2400ms 8 +#define ADP8860_FADE_T_2700ms 9 +#define ADP8860_FADE_T_3000ms 10 +#define ADP8860_FADE_T_3500ms 11 +#define ADP8860_FADE_T_4000ms 12 +#define ADP8860_FADE_T_4500ms 13 +#define ADP8860_FADE_T_5000ms 14 +#define ADP8860_FADE_T_5500ms 15 /* 5.5 Sec */ + +#define ADP8860_FADE_LAW_LINEAR 0 +#define ADP8860_FADE_LAW_SQUARE 1 +#define ADP8860_FADE_LAW_CUBIC1 2 +#define ADP8860_FADE_LAW_CUBIC2 3 + +#define ADP8860_BL_AMBL_FILT_80ms 0 /* Light sensor filter time */ +#define ADP8860_BL_AMBL_FILT_160ms 1 +#define ADP8860_BL_AMBL_FILT_320ms 2 +#define ADP8860_BL_AMBL_FILT_640ms 3 +#define ADP8860_BL_AMBL_FILT_1280ms 4 +#define ADP8860_BL_AMBL_FILT_2560ms 5 +#define ADP8860_BL_AMBL_FILT_5120ms 6 +#define ADP8860_BL_AMBL_FILT_10240ms 7 /* 10.24 sec */ + +/* + * Blacklight current 0..30mA + */ +#define ADP8860_BL_CUR_mA(I) ((I * 127) / 30) + +/* + * L2 comparator current 0..1106uA + */ +#define ADP8860_L2_COMP_CURR_uA(I) ((I * 255) / 1106) + +/* + * L3 comparator current 0..138uA + */ +#define ADP8860_L3_COMP_CURR_uA(I) ((I * 255) / 138) + +struct adp8860_backlight_platform_data { + u8 bl_led_assign; /* 1 = Backlight 0 = Individual LED */ + + u8 bl_fade_in; /* Backlight Fade-In Timer */ + u8 bl_fade_out; /* Backlight Fade-Out Timer */ + u8 bl_fade_law; /* fade-on/fade-off transfer characteristic */ + + u8 en_ambl_sens; /* 1 = enable ambient light sensor */ + u8 abml_filt; /* Light sensor filter time */ + + u8 l1_daylight_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l1_daylight_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_office_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_office_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_dark_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_dark_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + + u8 l2_trip; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l2_hyst; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l3_trip; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + u8 l3_hyst; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + + /** + * Independent Current Sinks / LEDS + * Sinks not assigned to the Backlight can be exposed to + * user space using the LEDS CLASS interface + */ + + int num_leds; + struct led_info *leds; + u8 led_fade_in; /* LED Fade-In Timer */ + u8 led_fade_out; /* LED Fade-Out Timer */ + u8 led_fade_law; /* fade-on/fade-off transfer characteristic */ + u8 led_on_time; + + /** + * Gain down disable. Setting this option does not allow the + * charge pump to switch to lower gains. NOT AVAILABLE on ADP8860 + * 1 = the charge pump doesn't switch down in gain until all LEDs are 0. + * The charge pump switches up in gain as needed. This feature is + * useful if the ADP8863 charge pump is used to drive an external load. + * This feature must be used when utilizing small fly capacitors + * (0402 or smaller). + * 0 = the charge pump automatically switches up and down in gain. + * This provides optimal efficiency, but is not suitable for driving + * loads that are not connected through the ADP8863 diode drivers. + * Additionally, the charge pump fly capacitors should be low ESR + * and sized 0603 or greater. + */ + + u8 gdwn_dis; +}; + +#endif /* __LINUX_I2C_ADP8860_H */ -- cgit v1.2.3 From 056d6ff470a8e782648fd020940c04d0d4a0d761 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 22 May 2017 00:09:53 +0200 Subject: video: adp8870: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Daniel Thompson Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Lee Jones --- include/linux/i2c/adp8870.h | 153 ---------------------------------- include/linux/platform_data/adp8870.h | 153 ++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+), 153 deletions(-) delete mode 100644 include/linux/i2c/adp8870.h create mode 100644 include/linux/platform_data/adp8870.h (limited to 'include/linux') diff --git a/include/linux/i2c/adp8870.h b/include/linux/i2c/adp8870.h deleted file mode 100644 index 624dceccbd5b..000000000000 --- a/include/linux/i2c/adp8870.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Definitions and platform data for Analog Devices - * Backlight drivers ADP8870 - * - * Copyright 2009-2010 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#ifndef __LINUX_I2C_ADP8870_H -#define __LINUX_I2C_ADP8870_H - -#define ID_ADP8870 8870 - -#define ADP8870_MAX_BRIGHTNESS 0x7F -#define FLAG_OFFT_SHIFT 8 - -/* - * LEDs subdevice platform data - */ - -#define ADP8870_LED_DIS_BLINK (0 << FLAG_OFFT_SHIFT) -#define ADP8870_LED_OFFT_600ms (1 << FLAG_OFFT_SHIFT) -#define ADP8870_LED_OFFT_1200ms (2 << FLAG_OFFT_SHIFT) -#define ADP8870_LED_OFFT_1800ms (3 << FLAG_OFFT_SHIFT) - -#define ADP8870_LED_ONT_200ms 0 -#define ADP8870_LED_ONT_600ms 1 -#define ADP8870_LED_ONT_800ms 2 -#define ADP8870_LED_ONT_1200ms 3 - -#define ADP8870_LED_D7 (7) -#define ADP8870_LED_D6 (6) -#define ADP8870_LED_D5 (5) -#define ADP8870_LED_D4 (4) -#define ADP8870_LED_D3 (3) -#define ADP8870_LED_D2 (2) -#define ADP8870_LED_D1 (1) - -/* - * Backlight subdevice platform data - */ - -#define ADP8870_BL_D7 (1 << 6) -#define ADP8870_BL_D6 (1 << 5) -#define ADP8870_BL_D5 (1 << 4) -#define ADP8870_BL_D4 (1 << 3) -#define ADP8870_BL_D3 (1 << 2) -#define ADP8870_BL_D2 (1 << 1) -#define ADP8870_BL_D1 (1 << 0) - -#define ADP8870_FADE_T_DIS 0 /* Fade Timer Disabled */ -#define ADP8870_FADE_T_300ms 1 /* 0.3 Sec */ -#define ADP8870_FADE_T_600ms 2 -#define ADP8870_FADE_T_900ms 3 -#define ADP8870_FADE_T_1200ms 4 -#define ADP8870_FADE_T_1500ms 5 -#define ADP8870_FADE_T_1800ms 6 -#define ADP8870_FADE_T_2100ms 7 -#define ADP8870_FADE_T_2400ms 8 -#define ADP8870_FADE_T_2700ms 9 -#define ADP8870_FADE_T_3000ms 10 -#define ADP8870_FADE_T_3500ms 11 -#define ADP8870_FADE_T_4000ms 12 -#define ADP8870_FADE_T_4500ms 13 -#define ADP8870_FADE_T_5000ms 14 -#define ADP8870_FADE_T_5500ms 15 /* 5.5 Sec */ - -#define ADP8870_FADE_LAW_LINEAR 0 -#define ADP8870_FADE_LAW_SQUARE 1 -#define ADP8870_FADE_LAW_CUBIC1 2 -#define ADP8870_FADE_LAW_CUBIC2 3 - -#define ADP8870_BL_AMBL_FILT_80ms 0 /* Light sensor filter time */ -#define ADP8870_BL_AMBL_FILT_160ms 1 -#define ADP8870_BL_AMBL_FILT_320ms 2 -#define ADP8870_BL_AMBL_FILT_640ms 3 -#define ADP8870_BL_AMBL_FILT_1280ms 4 -#define ADP8870_BL_AMBL_FILT_2560ms 5 -#define ADP8870_BL_AMBL_FILT_5120ms 6 -#define ADP8870_BL_AMBL_FILT_10240ms 7 /* 10.24 sec */ - -/* - * Blacklight current 0..30mA - */ -#define ADP8870_BL_CUR_mA(I) ((I * 127) / 30) - -/* - * L2 comparator current 0..1106uA - */ -#define ADP8870_L2_COMP_CURR_uA(I) ((I * 255) / 1106) - -/* - * L3 comparator current 0..551uA - */ -#define ADP8870_L3_COMP_CURR_uA(I) ((I * 255) / 551) - -/* - * L4 comparator current 0..275uA - */ -#define ADP8870_L4_COMP_CURR_uA(I) ((I * 255) / 275) - -/* - * L5 comparator current 0..138uA - */ -#define ADP8870_L5_COMP_CURR_uA(I) ((I * 255) / 138) - -struct adp8870_backlight_platform_data { - u8 bl_led_assign; /* 1 = Backlight 0 = Individual LED */ - u8 pwm_assign; /* 1 = Enables PWM mode */ - - u8 bl_fade_in; /* Backlight Fade-In Timer */ - u8 bl_fade_out; /* Backlight Fade-Out Timer */ - u8 bl_fade_law; /* fade-on/fade-off transfer characteristic */ - - u8 en_ambl_sens; /* 1 = enable ambient light sensor */ - u8 abml_filt; /* Light sensor filter time */ - - u8 l1_daylight_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l1_daylight_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l2_bright_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l2_bright_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l3_office_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l3_office_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l4_indoor_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l4_indor_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l5_dark_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ - u8 l5_dark_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ - - u8 l2_trip; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ - u8 l2_hyst; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ - u8 l3_trip; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ - u8 l3_hyst; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ - u8 l4_trip; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ - u8 l4_hyst; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ - u8 l5_trip; /* use L5_COMP_CURR_uA(I) 0 <= I <= 138 uA */ - u8 l5_hyst; /* use L6_COMP_CURR_uA(I) 0 <= I <= 138 uA */ - - /** - * Independent Current Sinks / LEDS - * Sinks not assigned to the Backlight can be exposed to - * user space using the LEDS CLASS interface - */ - - int num_leds; - struct led_info *leds; - u8 led_fade_in; /* LED Fade-In Timer */ - u8 led_fade_out; /* LED Fade-Out Timer */ - u8 led_fade_law; /* fade-on/fade-off transfer characteristic */ - u8 led_on_time; -}; - -#endif /* __LINUX_I2C_ADP8870_H */ diff --git a/include/linux/platform_data/adp8870.h b/include/linux/platform_data/adp8870.h new file mode 100644 index 000000000000..624dceccbd5b --- /dev/null +++ b/include/linux/platform_data/adp8870.h @@ -0,0 +1,153 @@ +/* + * Definitions and platform data for Analog Devices + * Backlight drivers ADP8870 + * + * Copyright 2009-2010 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef __LINUX_I2C_ADP8870_H +#define __LINUX_I2C_ADP8870_H + +#define ID_ADP8870 8870 + +#define ADP8870_MAX_BRIGHTNESS 0x7F +#define FLAG_OFFT_SHIFT 8 + +/* + * LEDs subdevice platform data + */ + +#define ADP8870_LED_DIS_BLINK (0 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_600ms (1 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_1200ms (2 << FLAG_OFFT_SHIFT) +#define ADP8870_LED_OFFT_1800ms (3 << FLAG_OFFT_SHIFT) + +#define ADP8870_LED_ONT_200ms 0 +#define ADP8870_LED_ONT_600ms 1 +#define ADP8870_LED_ONT_800ms 2 +#define ADP8870_LED_ONT_1200ms 3 + +#define ADP8870_LED_D7 (7) +#define ADP8870_LED_D6 (6) +#define ADP8870_LED_D5 (5) +#define ADP8870_LED_D4 (4) +#define ADP8870_LED_D3 (3) +#define ADP8870_LED_D2 (2) +#define ADP8870_LED_D1 (1) + +/* + * Backlight subdevice platform data + */ + +#define ADP8870_BL_D7 (1 << 6) +#define ADP8870_BL_D6 (1 << 5) +#define ADP8870_BL_D5 (1 << 4) +#define ADP8870_BL_D4 (1 << 3) +#define ADP8870_BL_D3 (1 << 2) +#define ADP8870_BL_D2 (1 << 1) +#define ADP8870_BL_D1 (1 << 0) + +#define ADP8870_FADE_T_DIS 0 /* Fade Timer Disabled */ +#define ADP8870_FADE_T_300ms 1 /* 0.3 Sec */ +#define ADP8870_FADE_T_600ms 2 +#define ADP8870_FADE_T_900ms 3 +#define ADP8870_FADE_T_1200ms 4 +#define ADP8870_FADE_T_1500ms 5 +#define ADP8870_FADE_T_1800ms 6 +#define ADP8870_FADE_T_2100ms 7 +#define ADP8870_FADE_T_2400ms 8 +#define ADP8870_FADE_T_2700ms 9 +#define ADP8870_FADE_T_3000ms 10 +#define ADP8870_FADE_T_3500ms 11 +#define ADP8870_FADE_T_4000ms 12 +#define ADP8870_FADE_T_4500ms 13 +#define ADP8870_FADE_T_5000ms 14 +#define ADP8870_FADE_T_5500ms 15 /* 5.5 Sec */ + +#define ADP8870_FADE_LAW_LINEAR 0 +#define ADP8870_FADE_LAW_SQUARE 1 +#define ADP8870_FADE_LAW_CUBIC1 2 +#define ADP8870_FADE_LAW_CUBIC2 3 + +#define ADP8870_BL_AMBL_FILT_80ms 0 /* Light sensor filter time */ +#define ADP8870_BL_AMBL_FILT_160ms 1 +#define ADP8870_BL_AMBL_FILT_320ms 2 +#define ADP8870_BL_AMBL_FILT_640ms 3 +#define ADP8870_BL_AMBL_FILT_1280ms 4 +#define ADP8870_BL_AMBL_FILT_2560ms 5 +#define ADP8870_BL_AMBL_FILT_5120ms 6 +#define ADP8870_BL_AMBL_FILT_10240ms 7 /* 10.24 sec */ + +/* + * Blacklight current 0..30mA + */ +#define ADP8870_BL_CUR_mA(I) ((I * 127) / 30) + +/* + * L2 comparator current 0..1106uA + */ +#define ADP8870_L2_COMP_CURR_uA(I) ((I * 255) / 1106) + +/* + * L3 comparator current 0..551uA + */ +#define ADP8870_L3_COMP_CURR_uA(I) ((I * 255) / 551) + +/* + * L4 comparator current 0..275uA + */ +#define ADP8870_L4_COMP_CURR_uA(I) ((I * 255) / 275) + +/* + * L5 comparator current 0..138uA + */ +#define ADP8870_L5_COMP_CURR_uA(I) ((I * 255) / 138) + +struct adp8870_backlight_platform_data { + u8 bl_led_assign; /* 1 = Backlight 0 = Individual LED */ + u8 pwm_assign; /* 1 = Enables PWM mode */ + + u8 bl_fade_in; /* Backlight Fade-In Timer */ + u8 bl_fade_out; /* Backlight Fade-Out Timer */ + u8 bl_fade_law; /* fade-on/fade-off transfer characteristic */ + + u8 en_ambl_sens; /* 1 = enable ambient light sensor */ + u8 abml_filt; /* Light sensor filter time */ + + u8 l1_daylight_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l1_daylight_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_bright_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l2_bright_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_office_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l3_office_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l4_indoor_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l4_indor_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l5_dark_max; /* use BL_CUR_mA(I) 0 <= I <= 30 mA */ + u8 l5_dark_dim; /* typ = 0, use BL_CUR_mA(I) 0 <= I <= 30 mA */ + + u8 l2_trip; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l2_hyst; /* use L2_COMP_CURR_uA(I) 0 <= I <= 1106 uA */ + u8 l3_trip; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + u8 l3_hyst; /* use L3_COMP_CURR_uA(I) 0 <= I <= 551 uA */ + u8 l4_trip; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ + u8 l4_hyst; /* use L4_COMP_CURR_uA(I) 0 <= I <= 275 uA */ + u8 l5_trip; /* use L5_COMP_CURR_uA(I) 0 <= I <= 138 uA */ + u8 l5_hyst; /* use L6_COMP_CURR_uA(I) 0 <= I <= 138 uA */ + + /** + * Independent Current Sinks / LEDS + * Sinks not assigned to the Backlight can be exposed to + * user space using the LEDS CLASS interface + */ + + int num_leds; + struct led_info *leds; + u8 led_fade_in; /* LED Fade-In Timer */ + u8 led_fade_out; /* LED Fade-Out Timer */ + u8 led_fade_law; /* fade-on/fade-off transfer characteristic */ + u8 led_on_time; +}; + +#endif /* __LINUX_I2C_ADP8870_H */ -- cgit v1.2.3 From 87354e5de04fe727227ff619af164202adcfa4d4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 6 Jul 2017 07:02:21 -0400 Subject: buffer: set errors in mapping at the time that the error occurs I noticed on xfs that I could still sometimes get back an error on fsync on a fd that was opened after the error condition had been cleared. The problem is that the buffer code sets the write_io_error flag and then later checks that flag to set the error in the mapping. That flag perisists for quite a while however. If the file is later opened with O_TRUNC, the buffers will then be invalidated and the mapping's error set such that a subsequent fsync will return error. I think this is incorrect, as there was no writeback between the open and fsync. Add a new mark_buffer_write_io_error operation that sets the flag and the error in the mapping at the same time. Replace all calls to set_buffer_write_io_error with mark_buffer_write_io_error, and remove the places that check this flag in order to set the error in the mapping. This sets the error in the mapping earlier, at the time that it's first detected. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Reviewed-by: Carlos Maiolino --- include/linux/buffer_head.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index bd029e52ef5e..e0abeba3ced7 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -149,6 +149,7 @@ void buffer_check_dirty_writeback(struct page *page, */ void mark_buffer_dirty(struct buffer_head *bh); +void mark_buffer_write_io_error(struct buffer_head *bh); void init_buffer(struct buffer_head *, bh_end_io_t *, void *); void touch_buffer(struct buffer_head *bh); void set_bh_page(struct buffer_head *bh, -- cgit v1.2.3 From 76341cabbdad65c10a4162e9dfa82a6342afc02f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 6 Jul 2017 07:02:22 -0400 Subject: jbd2: don't clear and reset errors after waiting on writeback Resetting this flag is almost certainly racy, and will be problematic with some coming changes. Make filemap_fdatawait_keep_errors return int, but not clear the flag(s). Have jbd2 call it instead of filemap_fdatawait and don't attempt to re-set the error flag if it fails. Reviewed-by: Jan Kara Reviewed-by: Carlos Maiolino Signed-off-by: Jeff Layton --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..8ac8df1b3550 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2514,7 +2514,7 @@ extern int write_inode_now(struct inode *, int); extern int filemap_fdatawrite(struct address_space *); extern int filemap_flush(struct address_space *); extern int filemap_fdatawait(struct address_space *); -extern void filemap_fdatawait_keep_errors(struct address_space *); +extern int filemap_fdatawait_keep_errors(struct address_space *mapping); extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); extern int filemap_write_and_wait(struct address_space *mapping); -- cgit v1.2.3 From 84cbadadc6eafc4798513773a2c8fce37dcd2fb8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 6 Jul 2017 07:02:24 -0400 Subject: lib: add errseq_t type and infrastructure for handling it An errseq_t is a way of recording errors in one place, and allowing any number of "subscribers" to tell whether an error has been set again since a previous time. It's implemented as an unsigned 32-bit value that is managed with atomic operations. The low order bits are designated to hold an error code (max size of MAX_ERRNO). The upper bits are used as a counter. The API works with consumers sampling an errseq_t value at a particular point in time. Later, that value can be used to tell whether new errors have been set since that time. Note that there is a 1 in 512k risk of collisions here if new errors are being recorded frequently, since we have so few bits to use as a counter. To mitigate this, one bit is used as a flag to tell whether the value has been sampled since a new value was recorded. That allows us to avoid bumping the counter if no one has sampled it since it was last bumped. Later patches will build on this infrastructure to change how writeback errors are tracked in the kernel. Signed-off-by: Jeff Layton Reviewed-by: NeilBrown Reviewed-by: Jan Kara --- include/linux/errseq.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/errseq.h (limited to 'include/linux') diff --git a/include/linux/errseq.h b/include/linux/errseq.h new file mode 100644 index 000000000000..9e0d444ac88d --- /dev/null +++ b/include/linux/errseq.h @@ -0,0 +1,19 @@ +#ifndef _LINUX_ERRSEQ_H +#define _LINUX_ERRSEQ_H + +/* See lib/errseq.c for more info */ + +typedef u32 errseq_t; + +errseq_t __errseq_set(errseq_t *eseq, int err); +static inline void errseq_set(errseq_t *eseq, int err) +{ + /* Optimize for the common case of no error */ + if (unlikely(err)) + __errseq_set(eseq, err); +} + +errseq_t errseq_sample(errseq_t *eseq); +int errseq_check(errseq_t *eseq, errseq_t since); +int errseq_check_and_advance(errseq_t *eseq, errseq_t *since); +#endif -- cgit v1.2.3 From 5660e13d2fd6af1903d4b0b98020af95ca2d638a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 6 Jul 2017 07:02:25 -0400 Subject: fs: new infrastructure for writeback error handling and reporting Most filesystems currently use mapping_set_error and filemap_check_errors for setting and reporting/clearing writeback errors at the mapping level. filemap_check_errors is indirectly called from most of the filemap_fdatawait_* functions and from filemap_write_and_wait*. These functions are called from all sorts of contexts to wait on writeback to finish -- e.g. mostly in fsync, but also in truncate calls, getattr, etc. The non-fsync callers are problematic. We should be reporting writeback errors during fsync, but many places spread over the tree clear out errors before they can be properly reported, or report errors at nonsensical times. If I get -EIO on a stat() call, there is no reason for me to assume that it is because some previous writeback failed. The fact that it also clears out the error such that a subsequent fsync returns 0 is a bug, and a nasty one since that's potentially silent data corruption. This patch adds a small bit of new infrastructure for setting and reporting errors during address_space writeback. While the above was my original impetus for adding this, I think it's also the case that current fsync semantics are just problematic for userland. Most applications that call fsync do so to ensure that the data they wrote has hit the backing store. In the case where there are multiple writers to the file at the same time, this is really hard to determine. The first one to call fsync will see any stored error, and the rest get back 0. The processes with open fds may not be associated with one another in any way. They could even be in different containers, so ensuring coordination between all fsync callers is not really an option. One way to remedy this would be to track what file descriptor was used to dirty the file, but that's rather cumbersome and would likely be slow. However, there is a simpler way to improve the semantics here without incurring too much overhead. This set adds an errseq_t to struct address_space, and a corresponding one is added to struct file. Writeback errors are recorded in the mapping's errseq_t, and the one in struct file is used as the "since" value. This changes the semantics of the Linux fsync implementation such that applications can now use it to determine whether there were any writeback errors since fsync(fd) was last called (or since the file was opened in the case of fsync having never been called). Note that those writeback errors may have occurred when writing data that was dirtied via an entirely different fd, but that's the case now with the current mapping_set_error/filemap_check_error infrastructure. This will at least prevent you from getting a false report of success. The new behavior is still consistent with the POSIX spec, and is more reliable for application developers. This patch just adds some basic infrastructure for doing this, and ensures that the f_wb_err "cursor" is properly set when a file is opened. Later patches will change the existing code to use this new infrastructure for reporting errors at fsync time. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara --- include/linux/fs.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8ac8df1b3550..78b5c2901712 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -30,7 +30,7 @@ #include #include #include - +#include #include #include @@ -392,6 +392,7 @@ struct address_space { gfp_t gfp_mask; /* implicit gfp mask for allocations */ struct list_head private_list; /* ditto */ void *private_data; /* ditto */ + errseq_t wb_err; } __attribute__((aligned(sizeof(long)))); /* * On most architectures that alignment is already the case; but @@ -868,6 +869,7 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; + errseq_t f_wb_err; } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { @@ -2526,6 +2528,62 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); extern int filemap_check_errors(struct address_space *mapping); +extern void __filemap_set_wb_err(struct address_space *mapping, int err); +extern int __must_check file_check_and_advance_wb_err(struct file *file); +extern int __must_check file_write_and_wait_range(struct file *file, + loff_t start, loff_t end); + +/** + * filemap_set_wb_err - set a writeback error on an address_space + * @mapping: mapping in which to set writeback error + * @err: error to be set in mapping + * + * When writeback fails in some way, we must record that error so that + * userspace can be informed when fsync and the like are called. We endeavor + * to report errors on any file that was open at the time of the error. Some + * internal callers also need to know when writeback errors have occurred. + * + * When a writeback error occurs, most filesystems will want to call + * filemap_set_wb_err to record the error in the mapping so that it will be + * automatically reported whenever fsync is called on the file. + * + * FIXME: mention FS_* flag here? + */ +static inline void filemap_set_wb_err(struct address_space *mapping, int err) +{ + /* Fastpath for common case of no error */ + if (unlikely(err)) + __filemap_set_wb_err(mapping, err); +} + +/** + * filemap_check_wb_error - has an error occurred since the mark was sampled? + * @mapping: mapping to check for writeback errors + * @since: previously-sampled errseq_t + * + * Grab the errseq_t value from the mapping, and see if it has changed "since" + * the given value was sampled. + * + * If it has then report the latest error set, otherwise return 0. + */ +static inline int filemap_check_wb_err(struct address_space *mapping, + errseq_t since) +{ + return errseq_check(&mapping->wb_err, since); +} + +/** + * filemap_sample_wb_err - sample the current errseq_t to test for later errors + * @mapping: mapping to be sampled + * + * Writeback errors are always reported relative to a particular sample point + * in the past. This function provides those sample points. + */ +static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) +{ + return errseq_sample(&mapping->wb_err); +} + extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -- cgit v1.2.3 From 8ed1e46aaf1bec6a12f4c89637f2c3ef4c70f18e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 6 Jul 2017 07:02:26 -0400 Subject: mm: set both AS_EIO/AS_ENOSPC and errseq_t in mapping_set_error When a writeback error occurs, we want later callers to be able to pick up that fact when they go to wait on that writeback to complete. Traditionally, we've used AS_EIO/AS_ENOSPC flags to track that, but that's problematic since only one "checker" will be informed when an error occurs. In later patches, we're going to want to convert many of these callers to check for errors since a well-defined point in time. For now, ensure that we can handle both sorts of checks by both setting errors in both places when there is a writeback failure. Signed-off-by: Jeff Layton --- include/linux/pagemap.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 316a19f6b635..28acc94e0f81 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -28,14 +28,33 @@ enum mapping_flags { AS_NO_WRITEBACK_TAGS = 5, }; +/** + * mapping_set_error - record a writeback error in the address_space + * @mapping - the mapping in which an error should be set + * @error - the error to set in the mapping + * + * When writeback fails in some way, we must record that error so that + * userspace can be informed when fsync and the like are called. We endeavor + * to report errors on any file that was open at the time of the error. Some + * internal callers also need to know when writeback errors have occurred. + * + * When a writeback error occurs, most filesystems will want to call + * mapping_set_error to record the error in the mapping so that it can be + * reported when the application calls fsync(2). + */ static inline void mapping_set_error(struct address_space *mapping, int error) { - if (unlikely(error)) { - if (error == -ENOSPC) - set_bit(AS_ENOSPC, &mapping->flags); - else - set_bit(AS_EIO, &mapping->flags); - } + if (likely(!error)) + return; + + /* Record in wb_err for checkers using errseq_t based tracking */ + filemap_set_wb_err(mapping, error); + + /* Record it in flags for now, for legacy callers */ + if (error == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else + set_bit(AS_EIO, &mapping->flags); } static inline void mapping_set_unevictable(struct address_space *mapping) -- cgit v1.2.3 From 60934b200ddd62187b149a7f32cc0f160c08a7ed Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 14 Dec 2016 02:49:13 +0300 Subject: NTB: Make link-state API being declared first Since link operations are usually performed before memory window access operations, it's logically better to declare link-related API before any of MW/Doorbell/Scratchpad methods. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- include/linux/ntb.h | 137 ++++++++++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index de87ceac110e..d7ab3e1ec88f 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -179,13 +179,13 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) /** * struct ntb_ctx_ops - ntb device operations + * @link_is_up: See ntb_link_is_up(). + * @link_enable: See ntb_link_enable(). + * @link_disable: See ntb_link_disable(). * @mw_count: See ntb_mw_count(). * @mw_get_range: See ntb_mw_get_range(). * @mw_set_trans: See ntb_mw_set_trans(). * @mw_clear_trans: See ntb_mw_clear_trans(). - * @link_is_up: See ntb_link_is_up(). - * @link_enable: See ntb_link_enable(). - * @link_disable: See ntb_link_disable(). * @db_is_unsafe: See ntb_db_is_unsafe(). * @db_valid_mask: See ntb_db_valid_mask(). * @db_vector_count: See ntb_db_vector_count(). @@ -212,6 +212,12 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @peer_spad_write: See ntb_peer_spad_write(). */ struct ntb_dev_ops { + int (*link_is_up)(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width); + int (*link_enable)(struct ntb_dev *ntb, + enum ntb_speed max_speed, enum ntb_width max_width); + int (*link_disable)(struct ntb_dev *ntb); + int (*mw_count)(struct ntb_dev *ntb); int (*mw_get_range)(struct ntb_dev *ntb, int idx, phys_addr_t *base, resource_size_t *size, @@ -220,12 +226,6 @@ struct ntb_dev_ops { dma_addr_t addr, resource_size_t size); int (*mw_clear_trans)(struct ntb_dev *ntb, int idx); - int (*link_is_up)(struct ntb_dev *ntb, - enum ntb_speed *speed, enum ntb_width *width); - int (*link_enable)(struct ntb_dev *ntb, - enum ntb_speed max_speed, enum ntb_width max_width); - int (*link_disable)(struct ntb_dev *ntb); - int (*db_is_unsafe)(struct ntb_dev *ntb); u64 (*db_valid_mask)(struct ntb_dev *ntb); int (*db_vector_count)(struct ntb_dev *ntb); @@ -265,13 +265,14 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) { /* commented callbacks are not required: */ return + ops->link_is_up && + ops->link_enable && + ops->link_disable && ops->mw_count && ops->mw_get_range && ops->mw_set_trans && /* ops->mw_clear_trans && */ - ops->link_is_up && - ops->link_enable && - ops->link_disable && + /* ops->db_is_unsafe && */ ops->db_valid_mask && @@ -440,6 +441,62 @@ void ntb_link_event(struct ntb_dev *ntb); */ void ntb_db_event(struct ntb_dev *ntb, int vector); +/** + * ntb_link_is_up() - get the current ntb link state + * @ntb: NTB device context. + * @speed: OUT - The link speed expressed as PCIe generation number. + * @width: OUT - The link width expressed as the number of PCIe lanes. + * + * Get the current state of the ntb link. It is recommended to query the link + * state once after every link event. It is safe to query the link state in + * the context of the link event callback. + * + * Return: One if the link is up, zero if the link is down, otherwise a + * negative value indicating the error number. + */ +static inline int ntb_link_is_up(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width) +{ + return ntb->ops->link_is_up(ntb, speed, width); +} + +/** + * ntb_link_enable() - enable the link on the secondary side of the ntb + * @ntb: NTB device context. + * @max_speed: The maximum link speed expressed as PCIe generation number. + * @max_width: The maximum link width expressed as the number of PCIe lanes. + * + * Enable the link on the secondary side of the ntb. This can only be done + * from the primary side of the ntb in primary or b2b topology. The ntb device + * should train the link to its maximum speed and width, or the requested speed + * and width, whichever is smaller, if supported. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_enable(struct ntb_dev *ntb, + enum ntb_speed max_speed, + enum ntb_width max_width) +{ + return ntb->ops->link_enable(ntb, max_speed, max_width); +} + +/** + * ntb_link_disable() - disable the link on the secondary side of the ntb + * @ntb: NTB device context. + * + * Disable the link on the secondary side of the ntb. This can only be + * done from the primary side of the ntb in primary or b2b topology. The ntb + * device should disable the link. Returning from this call must indicate that + * a barrier has passed, though with no more writes may pass in either + * direction across the link, except if this call returns an error number. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_disable(struct ntb_dev *ntb) +{ + return ntb->ops->link_disable(ntb); +} + /** * ntb_mw_count() - get the number of memory windows * @ntb: NTB device context. @@ -516,62 +573,6 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) return ntb->ops->mw_clear_trans(ntb, idx); } -/** - * ntb_link_is_up() - get the current ntb link state - * @ntb: NTB device context. - * @speed: OUT - The link speed expressed as PCIe generation number. - * @width: OUT - The link width expressed as the number of PCIe lanes. - * - * Get the current state of the ntb link. It is recommended to query the link - * state once after every link event. It is safe to query the link state in - * the context of the link event callback. - * - * Return: One if the link is up, zero if the link is down, otherwise a - * negative value indicating the error number. - */ -static inline int ntb_link_is_up(struct ntb_dev *ntb, - enum ntb_speed *speed, enum ntb_width *width) -{ - return ntb->ops->link_is_up(ntb, speed, width); -} - -/** - * ntb_link_enable() - enable the link on the secondary side of the ntb - * @ntb: NTB device context. - * @max_speed: The maximum link speed expressed as PCIe generation number. - * @max_width: The maximum link width expressed as the number of PCIe lanes. - * - * Enable the link on the secondary side of the ntb. This can only be done - * from the primary side of the ntb in primary or b2b topology. The ntb device - * should train the link to its maximum speed and width, or the requested speed - * and width, whichever is smaller, if supported. - * - * Return: Zero on success, otherwise an error number. - */ -static inline int ntb_link_enable(struct ntb_dev *ntb, - enum ntb_speed max_speed, - enum ntb_width max_width) -{ - return ntb->ops->link_enable(ntb, max_speed, max_width); -} - -/** - * ntb_link_disable() - disable the link on the secondary side of the ntb - * @ntb: NTB device context. - * - * Disable the link on the secondary side of the ntb. This can only be - * done from the primary side of the ntb in primary or b2b topology. The ntb - * device should disable the link. Returning from this call must indicate that - * a barrier has passed, though with no more writes may pass in either - * direction across the link, except if this call returns an error number. - * - * Return: Zero on success, otherwise an error number. - */ -static inline int ntb_link_disable(struct ntb_dev *ntb) -{ - return ntb->ops->link_disable(ntb); -} - /** * ntb_db_is_unsafe() - check if it is safe to use hardware doorbell * @ntb: NTB device context. -- cgit v1.2.3 From 1e5301196a88961b02fe43c73a952f78b2c84712 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 14 Dec 2016 02:49:14 +0300 Subject: NTB: Add indexed ports NTB API There is some NTB hardware, which can combine more than just two domains over NTB. For instance, some IDT PCIe-switches can have NTB-functions activated on more than two-ports. The different domains are distinguished by ports they are connected to. So the new port-related methods are added to the NTB API: ntb_port_number() - return local port ntb_peer_port_count() - return number of peers local port can connect to ntb_peer_port_number(pdix) - return port number by it index ntb_peer_port_idx(port) - return port index by it number Current test-drivers aren't changed much. They still support two-ports devices for the time being while multi-ports hardware drivers aren't added. By default port-related API is declared for two-ports hardware. So corresponding hardware drivers won't need to implement it. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- include/linux/ntb.h | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index d7ab3e1ec88f..d23483bae6f3 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -139,6 +139,20 @@ enum ntb_width { NTB_WIDTH_32 = 32, }; +/** + * enum ntb_default_port - NTB default port number + * @NTB_PORT_PRI_USD: Default port of the NTB_TOPO_PRI/NTB_TOPO_B2B_USD + * topologies + * @NTB_PORT_SEC_DSD: Default port of the NTB_TOPO_SEC/NTB_TOPO_B2B_DSD + * topologies + */ +enum ntb_default_port { + NTB_PORT_PRI_USD, + NTB_PORT_SEC_DSD +}; +#define NTB_DEF_PEER_CNT (1) +#define NTB_DEF_PEER_IDX (0) + /** * struct ntb_client_ops - ntb client operations * @probe: Notify client of a new device. @@ -179,6 +193,10 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) /** * struct ntb_ctx_ops - ntb device operations + * @port_number: See ntb_port_number(). + * @peer_port_count: See ntb_peer_port_count(). + * @peer_port_number: See ntb_peer_port_number(). + * @peer_port_idx: See ntb_peer_port_idx(). * @link_is_up: See ntb_link_is_up(). * @link_enable: See ntb_link_enable(). * @link_disable: See ntb_link_disable(). @@ -212,6 +230,11 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @peer_spad_write: See ntb_peer_spad_write(). */ struct ntb_dev_ops { + int (*port_number)(struct ntb_dev *ntb); + int (*peer_port_count)(struct ntb_dev *ntb); + int (*peer_port_number)(struct ntb_dev *ntb, int pidx); + int (*peer_port_idx)(struct ntb_dev *ntb, int port); + int (*link_is_up)(struct ntb_dev *ntb, enum ntb_speed *speed, enum ntb_width *width); int (*link_enable)(struct ntb_dev *ntb, @@ -265,6 +288,9 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) { /* commented callbacks are not required: */ return + !ops->peer_port_count == !ops->port_number && + !ops->peer_port_number == !ops->port_number && + !ops->peer_port_idx == !ops->port_number && ops->link_is_up && ops->link_enable && ops->link_disable && @@ -441,6 +467,136 @@ void ntb_link_event(struct ntb_dev *ntb); */ void ntb_db_event(struct ntb_dev *ntb, int vector); +/** + * ntb_default_port_number() - get the default local port number + * @ntb: NTB device context. + * + * If hardware driver doesn't specify port_number() callback method, the NTB + * is considered with just two ports. So this method returns default local + * port number in compliance with topology. + * + * NOTE Don't call this method directly. The ntb_port_number() function should + * be used instead. + * + * Return: the default local port number + */ +int ntb_default_port_number(struct ntb_dev *ntb); + +/** + * ntb_default_port_count() - get the default number of peer device ports + * @ntb: NTB device context. + * + * By default hardware driver supports just one peer device. + * + * NOTE Don't call this method directly. The ntb_peer_port_count() function + * should be used instead. + * + * Return: the default number of peer ports + */ +int ntb_default_peer_port_count(struct ntb_dev *ntb); + +/** + * ntb_default_peer_port_number() - get the default peer port by given index + * @ntb: NTB device context. + * @idx: Peer port index (should not differ from zero). + * + * By default hardware driver supports just one peer device, so this method + * shall return the corresponding value from enum ntb_default_port. + * + * NOTE Don't call this method directly. The ntb_peer_port_number() function + * should be used instead. + * + * Return: the peer device port or negative value indicating an error + */ +int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx); + +/** + * ntb_default_peer_port_idx() - get the default peer device port index by + * given port number + * @ntb: NTB device context. + * @port: Peer port number (should be one of enum ntb_default_port). + * + * By default hardware driver supports just one peer device, so while + * specified port-argument indicates peer port from enum ntb_default_port, + * the return value shall be zero. + * + * NOTE Don't call this method directly. The ntb_peer_port_idx() function + * should be used instead. + * + * Return: the peer port index or negative value indicating an error + */ +int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port); + +/** + * ntb_port_number() - get the local port number + * @ntb: NTB device context. + * + * Hardware must support at least simple two-ports ntb connection + * + * Return: the local port number + */ +static inline int ntb_port_number(struct ntb_dev *ntb) +{ + if (!ntb->ops->port_number) + return ntb_default_port_number(ntb); + + return ntb->ops->port_number(ntb); +} + +/** + * ntb_peer_port_count() - get the number of peer device ports + * @ntb: NTB device context. + * + * Hardware may support an access to memory of several remote domains + * over multi-port NTB devices. This method returns the number of peers, + * local device can have shared memory with. + * + * Return: the number of peer ports + */ +static inline int ntb_peer_port_count(struct ntb_dev *ntb) +{ + if (!ntb->ops->peer_port_count) + return ntb_default_peer_port_count(ntb); + + return ntb->ops->peer_port_count(ntb); +} + +/** + * ntb_peer_port_number() - get the peer port by given index + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * Peer ports are continuously enumerated by NTB API logic, so this method + * lets to retrieve port real number by its index. + * + * Return: the peer device port or negative value indicating an error + */ +static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx) +{ + if (!ntb->ops->peer_port_number) + return ntb_default_peer_port_number(ntb, pidx); + + return ntb->ops->peer_port_number(ntb, pidx); +} + +/** + * ntb_peer_port_idx() - get the peer device port index by given port number + * @ntb: NTB device context. + * @port: Peer port number. + * + * Inverse operation of ntb_peer_port_number(), so one can get port index + * by specified port number. + * + * Return: the peer port index or negative value indicating an error + */ +static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port) +{ + if (!ntb->ops->peer_port_idx) + return ntb_default_peer_port_idx(ntb, port); + + return ntb->ops->peer_port_idx(ntb, port); +} + /** * ntb_link_is_up() - get the current ntb link state * @ntb: NTB device context. -- cgit v1.2.3 From 4e8c11b7fd29f70eb7af43bae908297689f2c3da Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 14 Dec 2016 02:49:15 +0300 Subject: NTB: Alter link-state API to support multi-port devices Multi-port devices permit the NTB connections between multiple domains, so a local device can have NTB link being up with one peer and being down with another. NTB link-state API is appropriately altered to return a bitfield of the link-states between the local device and possible peers. Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index d23483bae6f3..b2b2924f5f43 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -235,7 +235,7 @@ struct ntb_dev_ops { int (*peer_port_number)(struct ntb_dev *ntb, int pidx); int (*peer_port_idx)(struct ntb_dev *ntb, int port); - int (*link_is_up)(struct ntb_dev *ntb, + u64 (*link_is_up)(struct ntb_dev *ntb, enum ntb_speed *speed, enum ntb_width *width); int (*link_enable)(struct ntb_dev *ntb, enum ntb_speed max_speed, enum ntb_width max_width); @@ -607,25 +607,26 @@ static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port) * state once after every link event. It is safe to query the link state in * the context of the link event callback. * - * Return: One if the link is up, zero if the link is down, otherwise a - * negative value indicating the error number. + * Return: bitfield of indexed ports link state: bit is set/cleared if the + * link is up/down respectively. */ -static inline int ntb_link_is_up(struct ntb_dev *ntb, +static inline u64 ntb_link_is_up(struct ntb_dev *ntb, enum ntb_speed *speed, enum ntb_width *width) { return ntb->ops->link_is_up(ntb, speed, width); } /** - * ntb_link_enable() - enable the link on the secondary side of the ntb + * ntb_link_enable() - enable the local port ntb connection * @ntb: NTB device context. * @max_speed: The maximum link speed expressed as PCIe generation number. * @max_width: The maximum link width expressed as the number of PCIe lanes. * - * Enable the link on the secondary side of the ntb. This can only be done - * from the primary side of the ntb in primary or b2b topology. The ntb device - * should train the link to its maximum speed and width, or the requested speed - * and width, whichever is smaller, if supported. + * Enable the NTB/PCIe link on the local or remote (for bridge-to-bridge + * topology) side of the bridge. If it's supported the ntb device should train + * the link to its maximum speed and width, or the requested speed and width, + * whichever is smaller. Some hardware doesn't support PCIe link training, so + * the last two arguments will be ignored then. * * Return: Zero on success, otherwise an error number. */ @@ -637,14 +638,14 @@ static inline int ntb_link_enable(struct ntb_dev *ntb, } /** - * ntb_link_disable() - disable the link on the secondary side of the ntb + * ntb_link_disable() - disable the local port ntb connection * @ntb: NTB device context. * - * Disable the link on the secondary side of the ntb. This can only be - * done from the primary side of the ntb in primary or b2b topology. The ntb - * device should disable the link. Returning from this call must indicate that - * a barrier has passed, though with no more writes may pass in either - * direction across the link, except if this call returns an error number. + * Disable the link on the local or remote (for b2b topology) of the ntb. + * The ntb device should disable the link. Returning from this call must + * indicate that a barrier has passed, though with no more writes may pass in + * either direction across the link, except if this call returns an error + * number. * * Return: Zero on success, otherwise an error number. */ -- cgit v1.2.3 From 443b9a14ecbe811071467d54d6f2f1182835cc4d Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 11 Jan 2017 03:11:33 +0300 Subject: NTB: Alter MW API to support multi-ports devices Multi-port NTB devices permit to share a memory between all accessible peers. Memory Windows API is altered to correspondingly initialize and map memory windows for such devices: ntb_mw_count(pidx); - number of inbound memory windows, which can be allocated for shared buffer with specified peer device. ntb_mw_get_align(pidx, widx); - get alignment and size restriction parameters to properly allocate inbound memory region. ntb_peer_mw_count(); - get number of outbound memory windows. ntb_peer_mw_get_addr(widx); - get mapping address of an outbound memory window If hardware supports inbound translation configured on the local ntb port: ntb_mw_set_trans(pidx, widx); - set translation address of allocated inbound memory window so a peer device could access it. ntb_mw_clear_trans(pidx, widx); - clear the translation address of an inbound memory window. If hardware supports outbound translation configured on the peer ntb port: ntb_peer_mw_set_trans(pidx, widx); - set translation address of a memory window retrieved from a peer device ntb_peer_mw_clear_trans(pidx, widx); - clear the translation address of an outbound memory window Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 208 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 163 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index b2b2924f5f43..2ea83f91a236 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * Copyright (C) 2016 T-Platforms. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,6 +19,7 @@ * BSD LICENSE * * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * Copyright (C) 2016 T-Platforms. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -201,9 +203,13 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @link_enable: See ntb_link_enable(). * @link_disable: See ntb_link_disable(). * @mw_count: See ntb_mw_count(). - * @mw_get_range: See ntb_mw_get_range(). + * @mw_get_align: See ntb_mw_get_align(). * @mw_set_trans: See ntb_mw_set_trans(). * @mw_clear_trans: See ntb_mw_clear_trans(). + * @peer_mw_count: See ntb_peer_mw_count(). + * @peer_mw_get_addr: See ntb_peer_mw_get_addr(). + * @peer_mw_set_trans: See ntb_peer_mw_set_trans(). + * @peer_mw_clear_trans:See ntb_peer_mw_clear_trans(). * @db_is_unsafe: See ntb_db_is_unsafe(). * @db_valid_mask: See ntb_db_valid_mask(). * @db_vector_count: See ntb_db_vector_count(). @@ -241,13 +247,20 @@ struct ntb_dev_ops { enum ntb_speed max_speed, enum ntb_width max_width); int (*link_disable)(struct ntb_dev *ntb); - int (*mw_count)(struct ntb_dev *ntb); - int (*mw_get_range)(struct ntb_dev *ntb, int idx, - phys_addr_t *base, resource_size_t *size, - resource_size_t *align, resource_size_t *align_size); - int (*mw_set_trans)(struct ntb_dev *ntb, int idx, + int (*mw_count)(struct ntb_dev *ntb, int pidx); + int (*mw_get_align)(struct ntb_dev *ntb, int pidx, int widx, + resource_size_t *addr_align, + resource_size_t *size_align, + resource_size_t *size_max); + int (*mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx, dma_addr_t addr, resource_size_t size); - int (*mw_clear_trans)(struct ntb_dev *ntb, int idx); + int (*mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx); + int (*peer_mw_count)(struct ntb_dev *ntb); + int (*peer_mw_get_addr)(struct ntb_dev *ntb, int widx, + phys_addr_t *base, resource_size_t *size); + int (*peer_mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx, + u64 addr, resource_size_t size); + int (*peer_mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx); int (*db_is_unsafe)(struct ntb_dev *ntb); u64 (*db_valid_mask)(struct ntb_dev *ntb); @@ -295,9 +308,13 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) ops->link_enable && ops->link_disable && ops->mw_count && - ops->mw_get_range && - ops->mw_set_trans && + ops->mw_get_align && + (ops->mw_set_trans || + ops->peer_mw_set_trans) && /* ops->mw_clear_trans && */ + ops->peer_mw_count && + ops->peer_mw_get_addr && + /* ops->peer_mw_clear_trans && */ /* ops->db_is_unsafe && */ ops->db_valid_mask && @@ -655,79 +672,180 @@ static inline int ntb_link_disable(struct ntb_dev *ntb) } /** - * ntb_mw_count() - get the number of memory windows + * ntb_mw_count() - get the number of inbound memory windows, which could + * be created for a specified peer device * @ntb: NTB device context. + * @pidx: Port index of peer device. * * Hardware and topology may support a different number of memory windows. + * Moreover different peer devices can support different number of memory + * windows. Simply speaking this method returns the number of possible inbound + * memory windows to share with specified peer device. * * Return: the number of memory windows. */ -static inline int ntb_mw_count(struct ntb_dev *ntb) +static inline int ntb_mw_count(struct ntb_dev *ntb, int pidx) { - return ntb->ops->mw_count(ntb); + return ntb->ops->mw_count(ntb, pidx); } /** - * ntb_mw_get_range() - get the range of a memory window + * ntb_mw_get_align() - get the restriction parameters of inbound memory window * @ntb: NTB device context. - * @idx: Memory window number. - * @base: OUT - the base address for mapping the memory window - * @size: OUT - the size for mapping the memory window - * @align: OUT - the base alignment for translating the memory window - * @align_size: OUT - the size alignment for translating the memory window - * - * Get the range of a memory window. NULL may be given for any output - * parameter if the value is not needed. The base and size may be used for - * mapping the memory window, to access the peer memory. The alignment and - * size may be used for translating the memory window, for the peer to access - * memory on the local system. - * - * Return: Zero on success, otherwise an error number. + * @pidx: Port index of peer device. + * @widx: Memory window index. + * @addr_align: OUT - the base alignment for translating the memory window + * @size_align: OUT - the size alignment for translating the memory window + * @size_max: OUT - the maximum size of the memory window + * + * Get the alignments of an inbound memory window with specified index. + * NULL may be given for any output parameter if the value is not needed. + * The alignment and size parameters may be used for allocation of proper + * shared memory. + * + * Return: Zero on success, otherwise a negative error number. */ -static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx, - phys_addr_t *base, resource_size_t *size, - resource_size_t *align, resource_size_t *align_size) +static inline int ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx, + resource_size_t *addr_align, + resource_size_t *size_align, + resource_size_t *size_max) { - return ntb->ops->mw_get_range(ntb, idx, base, size, - align, align_size); + return ntb->ops->mw_get_align(ntb, pidx, widx, addr_align, size_align, + size_max); } /** - * ntb_mw_set_trans() - set the translation of a memory window + * ntb_mw_set_trans() - set the translation of an inbound memory window * @ntb: NTB device context. - * @idx: Memory window number. - * @addr: The dma address local memory to expose to the peer. + * @pidx: Port index of peer device. + * @widx: Memory window index. + * @addr: The dma address of local memory to expose to the peer. * @size: The size of the local memory to expose to the peer. * * Set the translation of a memory window. The peer may access local memory * through the window starting at the address, up to the size. The address - * must be aligned to the alignment specified by ntb_mw_get_range(). The size - * must be aligned to the size alignment specified by ntb_mw_get_range(). + * and size must be aligned in compliance with restrictions of + * ntb_mw_get_align(). The region size should not exceed the size_max parameter + * of that method. + * + * This method may not be implemented due to the hardware specific memory + * windows interface. * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx, +static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, dma_addr_t addr, resource_size_t size) { - return ntb->ops->mw_set_trans(ntb, idx, addr, size); + if (!ntb->ops->mw_set_trans) + return 0; + + return ntb->ops->mw_set_trans(ntb, pidx, widx, addr, size); } /** - * ntb_mw_clear_trans() - clear the translation of a memory window + * ntb_mw_clear_trans() - clear the translation address of an inbound memory + * window * @ntb: NTB device context. - * @idx: Memory window number. + * @pidx: Port index of peer device. + * @widx: Memory window index. * - * Clear the translation of a memory window. The peer may no longer access - * local memory through the window. + * Clear the translation of an inbound memory window. The peer may no longer + * access local memory through the window. * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) +static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int pidx, int widx) { if (!ntb->ops->mw_clear_trans) - return ntb->ops->mw_set_trans(ntb, idx, 0, 0); + return ntb_mw_set_trans(ntb, pidx, widx, 0, 0); + + return ntb->ops->mw_clear_trans(ntb, pidx, widx); +} + +/** + * ntb_peer_mw_count() - get the number of outbound memory windows, which could + * be mapped to access a shared memory + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of memory windows. + * This method returns the number of outbound memory windows supported by + * local device. + * + * Return: the number of memory windows. + */ +static inline int ntb_peer_mw_count(struct ntb_dev *ntb) +{ + return ntb->ops->peer_mw_count(ntb); +} + +/** + * ntb_peer_mw_get_addr() - get map address of an outbound memory window + * @ntb: NTB device context. + * @widx: Memory window index (within ntb_peer_mw_count() return value). + * @base: OUT - the base address of mapping region. + * @size: OUT - the size of mapping region. + * + * Get base and size of memory region to map. NULL may be given for any output + * parameter if the value is not needed. The base and size may be used for + * mapping the memory window, to access the peer memory. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_peer_mw_get_addr(struct ntb_dev *ntb, int widx, + phys_addr_t *base, resource_size_t *size) +{ + return ntb->ops->peer_mw_get_addr(ntb, widx, base, size); +} + +/** + * ntb_peer_mw_set_trans() - set a translation address of a memory window + * retrieved from a peer device + * @ntb: NTB device context. + * @pidx: Port index of peer device the translation address received from. + * @widx: Memory window index. + * @addr: The dma address of the shared memory to access. + * @size: The size of the shared memory to access. + * + * Set the translation of an outbound memory window. The local device may + * access shared memory allocated by a peer device sent the address. + * + * This method may not be implemented due to the hardware specific memory + * windows interface, so a translation address can be only set on the side, + * where shared memory (inbound memory windows) is allocated. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, + u64 addr, resource_size_t size) +{ + if (!ntb->ops->peer_mw_set_trans) + return 0; + + return ntb->ops->peer_mw_set_trans(ntb, pidx, widx, addr, size); +} + +/** + * ntb_peer_mw_clear_trans() - clear the translation address of an outbound + * memory window + * @ntb: NTB device context. + * @pidx: Port index of peer device. + * @widx: Memory window index. + * + * Clear the translation of a outbound memory window. The local device may no + * longer access a shared memory through the window. + * + * This method may not be implemented due to the hardware specific memory + * windows interface. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx, + int widx) +{ + if (!ntb->ops->peer_mw_clear_trans) + return ntb_peer_mw_set_trans(ntb, pidx, widx, 0, 0); - return ntb->ops->mw_clear_trans(ntb, idx); + return ntb->ops->peer_mw_clear_trans(ntb, pidx, widx); } /** -- cgit v1.2.3 From d67288a39584daad11edee9b03d53264ba147453 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 11 Jan 2017 03:13:20 +0300 Subject: NTB: Alter Scratchpads API to support multi-ports devices Even though there is no any real NTB hardware, which would have both more than two ports and Scratchpad registers, it is logically correct to have Scratchpad API accepting a peer port index as well. Intel/AMD drivers utilize Primary and Secondary topology to split Scratchpad between connected root devices. Since port-index API introduced, Intel/AMD NTB hardware drivers can use device port to determine which Scratchpad registers actually belong to local and peer devices. The same approach can be used if some potential hardware in future will be multi-port and have some set of Scratchpads. Here are the brief of changes in the API: ntb_spad_count() - return number of Scratchpads per each port ntb_peer_spad_addr(pidx, sidx) - address of Scratchpad register of the peer device with pidx-index ntb_peer_spad_read(pidx, sidx) - read specified Scratchpad register of the peer with pidx-index ntb_peer_spad_write(pidx, sidx) - write data to Scratchpad register of the peer with pidx-index Since there is hardware which doesn't support Scratchpad registers, the corresponding API methods are now made optional. Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 73 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 2ea83f91a236..4e3cd56af732 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -288,13 +288,14 @@ struct ntb_dev_ops { int (*spad_is_unsafe)(struct ntb_dev *ntb); int (*spad_count)(struct ntb_dev *ntb); - u32 (*spad_read)(struct ntb_dev *ntb, int idx); - int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val); + u32 (*spad_read)(struct ntb_dev *ntb, int sidx); + int (*spad_write)(struct ntb_dev *ntb, int sidx, u32 val); - int (*peer_spad_addr)(struct ntb_dev *ntb, int idx, + int (*peer_spad_addr)(struct ntb_dev *ntb, int pidx, int sidx, phys_addr_t *spad_addr); - u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx); - int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val); + u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx); + int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx, + u32 val); }; static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) @@ -335,13 +336,12 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* ops->peer_db_read_mask && */ /* ops->peer_db_set_mask && */ /* ops->peer_db_clear_mask && */ - /* ops->spad_is_unsafe && */ - ops->spad_count && - ops->spad_read && - ops->spad_write && - /* ops->peer_spad_addr && */ - /* ops->peer_spad_read && */ - ops->peer_spad_write && + /* !ops->spad_is_unsafe == !ops->spad_count && */ + !ops->spad_read == !ops->spad_count && + !ops->spad_write == !ops->spad_count && + /* !ops->peer_spad_addr == !ops->spad_count && */ + /* !ops->peer_spad_read == !ops->spad_count && */ + !ops->peer_spad_write == !ops->spad_count && 1; } @@ -1176,47 +1176,58 @@ static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb) * @ntb: NTB device context. * * Hardware and topology may support a different number of scratchpads. + * Although it must be the same for all ports per NTB device. * * Return: the number of scratchpads. */ static inline int ntb_spad_count(struct ntb_dev *ntb) { + if (!ntb->ops->spad_count) + return 0; + return ntb->ops->spad_count(ntb); } /** * ntb_spad_read() - read the local scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @sidx: Scratchpad index. * * Read the local scratchpad register, and return the value. * * Return: The value of the local scratchpad register. */ -static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx) +static inline u32 ntb_spad_read(struct ntb_dev *ntb, int sidx) { - return ntb->ops->spad_read(ntb, idx); + if (!ntb->ops->spad_read) + return ~(u32)0; + + return ntb->ops->spad_read(ntb, sidx); } /** * ntb_spad_write() - write the local scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @sidx: Scratchpad index. * @val: Scratchpad value. * * Write the value to the local scratchpad register. * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val) +static inline int ntb_spad_write(struct ntb_dev *ntb, int sidx, u32 val) { - return ntb->ops->spad_write(ntb, idx, val); + if (!ntb->ops->spad_write) + return -EINVAL; + + return ntb->ops->spad_write(ntb, sidx, val); } /** * ntb_peer_spad_addr() - address of the peer scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @pidx: Port index of peer device. + * @sidx: Scratchpad index. * @spad_addr: OUT - The address of the peer scratchpad register. * * Return the address of the peer doorbell register. This may be used, for @@ -1224,45 +1235,51 @@ static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val) * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, +static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx, phys_addr_t *spad_addr) { if (!ntb->ops->peer_spad_addr) return -EINVAL; - return ntb->ops->peer_spad_addr(ntb, idx, spad_addr); + return ntb->ops->peer_spad_addr(ntb, pidx, sidx, spad_addr); } /** * ntb_peer_spad_read() - read the peer scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @pidx: Port index of peer device. + * @sidx: Scratchpad index. * * Read the peer scratchpad register, and return the value. * * Return: The value of the local scratchpad register. */ -static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx) +static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx) { if (!ntb->ops->peer_spad_read) - return 0; + return ~(u32)0; - return ntb->ops->peer_spad_read(ntb, idx); + return ntb->ops->peer_spad_read(ntb, pidx, sidx); } /** * ntb_peer_spad_write() - write the peer scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @pidx: Port index of peer device. + * @sidx: Scratchpad index. * @val: Scratchpad value. * * Write the value to the peer scratchpad register. * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val) +static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx, + u32 val) { - return ntb->ops->peer_spad_write(ntb, idx, val); + if (!ntb->ops->peer_spad_write) + return -EINVAL; + + return ntb->ops->peer_spad_write(ntb, pidx, sidx, val); } #endif -- cgit v1.2.3 From bc3e49adc279c5505d6df8dd8c7fca45d6d3d21a Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Tue, 20 Dec 2016 12:48:20 +0300 Subject: NTB: Add Messaging NTB API Some IDT NTB-capable PCIe-switches have message registers to communicate with peer devices. This patch adds new NTB API callback methods, which can be used to utilize these registers functionality: ntb_msg_count(); - get number of message registers ntb_msg_inbits(); - get bitfield of inbound message registers status ntb_msg_outbits(); - get bitfield of outbound message registers status ntb_msg_read_sts(); - read the inbound and outbound message registers status ntb_msg_clear_sts(); - clear status bits of message registers ntb_msg_set_mask(); - mask interrupts raised by status bits of message registers. ntb_msg_clear_mask(); - clear interrupts mask bits of message registers ntb_msg_read(midx, *pidx); - read message register with specified index, additionally getting peer port index which data received from ntb_msg_write(midx, pidx); - write data to the specified message register sending it to the passed peer device connected over a pidx port ntb_msg_event(); - notify driver context of a new message event Of course there is hardware which doesn't support Message registers, so this API is made optional. Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 4e3cd56af732..d59688f91618 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -178,10 +178,12 @@ static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops) * struct ntb_ctx_ops - ntb driver context operations * @link_event: See ntb_link_event(). * @db_event: See ntb_db_event(). + * @msg_event: See ntb_msg_event(). */ struct ntb_ctx_ops { void (*link_event)(void *ctx); void (*db_event)(void *ctx, int db_vector); + void (*msg_event)(void *ctx); }; static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) @@ -190,6 +192,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) return /* ops->link_event && */ /* ops->db_event && */ + /* ops->msg_event && */ 1; } @@ -234,6 +237,15 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @peer_spad_addr: See ntb_peer_spad_addr(). * @peer_spad_read: See ntb_peer_spad_read(). * @peer_spad_write: See ntb_peer_spad_write(). + * @msg_count: See ntb_msg_count(). + * @msg_inbits: See ntb_msg_inbits(). + * @msg_outbits: See ntb_msg_outbits(). + * @msg_read_sts: See ntb_msg_read_sts(). + * @msg_clear_sts: See ntb_msg_clear_sts(). + * @msg_set_mask: See ntb_msg_set_mask(). + * @msg_clear_mask: See ntb_msg_clear_mask(). + * @msg_read: See ntb_msg_read(). + * @msg_write: See ntb_msg_write(). */ struct ntb_dev_ops { int (*port_number)(struct ntb_dev *ntb); @@ -296,6 +308,16 @@ struct ntb_dev_ops { u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx); int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx, u32 val); + + int (*msg_count)(struct ntb_dev *ntb); + u64 (*msg_inbits)(struct ntb_dev *ntb); + u64 (*msg_outbits)(struct ntb_dev *ntb); + u64 (*msg_read_sts)(struct ntb_dev *ntb); + int (*msg_clear_sts)(struct ntb_dev *ntb, u64 sts_bits); + int (*msg_set_mask)(struct ntb_dev *ntb, u64 mask_bits); + int (*msg_clear_mask)(struct ntb_dev *ntb, u64 mask_bits); + int (*msg_read)(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg); + int (*msg_write)(struct ntb_dev *ntb, int midx, int pidx, u32 msg); }; static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) @@ -342,6 +364,15 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* !ops->peer_spad_addr == !ops->spad_count && */ /* !ops->peer_spad_read == !ops->spad_count && */ !ops->peer_spad_write == !ops->spad_count && + + !ops->msg_inbits == !ops->msg_count && + !ops->msg_outbits == !ops->msg_count && + !ops->msg_read_sts == !ops->msg_count && + !ops->msg_clear_sts == !ops->msg_count && + /* !ops->msg_set_mask == !ops->msg_count && */ + /* !ops->msg_clear_mask == !ops->msg_count && */ + !ops->msg_read == !ops->msg_count && + !ops->msg_write == !ops->msg_count && 1; } @@ -484,6 +515,18 @@ void ntb_link_event(struct ntb_dev *ntb); */ void ntb_db_event(struct ntb_dev *ntb, int vector); +/** + * ntb_msg_event() - notify driver context of a message event + * @ntb: NTB device context. + * + * Notify the driver context of a message event. If hardware supports + * message registers, this event indicates, that a new message arrived in + * some incoming message register or last sent message couldn't be delivered. + * The events can be masked/unmasked by the methods ntb_msg_set_mask() and + * ntb_msg_clear_mask(). + */ +void ntb_msg_event(struct ntb_dev *ntb); + /** * ntb_default_port_number() - get the default local port number * @ntb: NTB device context. @@ -1282,4 +1325,166 @@ static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx, return ntb->ops->peer_spad_write(ntb, pidx, sidx, val); } +/** + * ntb_msg_count() - get the number of message registers + * @ntb: NTB device context. + * + * Hardware may support a different number of message registers. + * + * Return: the number of message registers. + */ +static inline int ntb_msg_count(struct ntb_dev *ntb) +{ + if (!ntb->ops->msg_count) + return 0; + + return ntb->ops->msg_count(ntb); +} + +/** + * ntb_msg_inbits() - get a bitfield of inbound message registers status + * @ntb: NTB device context. + * + * The method returns the bitfield of status and mask registers, which related + * to inbound message registers. + * + * Return: bitfield of inbound message registers. + */ +static inline u64 ntb_msg_inbits(struct ntb_dev *ntb) +{ + if (!ntb->ops->msg_inbits) + return 0; + + return ntb->ops->msg_inbits(ntb); +} + +/** + * ntb_msg_outbits() - get a bitfield of outbound message registers status + * @ntb: NTB device context. + * + * The method returns the bitfield of status and mask registers, which related + * to outbound message registers. + * + * Return: bitfield of outbound message registers. + */ +static inline u64 ntb_msg_outbits(struct ntb_dev *ntb) +{ + if (!ntb->ops->msg_outbits) + return 0; + + return ntb->ops->msg_outbits(ntb); +} + +/** + * ntb_msg_read_sts() - read the message registers status + * @ntb: NTB device context. + * + * Read the status of message register. Inbound and outbound message registers + * related bits can be filtered by masks retrieved from ntb_msg_inbits() and + * ntb_msg_outbits(). + * + * Return: status bits of message registers + */ +static inline u64 ntb_msg_read_sts(struct ntb_dev *ntb) +{ + if (!ntb->ops->msg_read_sts) + return 0; + + return ntb->ops->msg_read_sts(ntb); +} + +/** + * ntb_msg_clear_sts() - clear status bits of message registers + * @ntb: NTB device context. + * @sts_bits: Status bits to clear. + * + * Clear bits in the status register. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_clear_sts(struct ntb_dev *ntb, u64 sts_bits) +{ + if (!ntb->ops->msg_clear_sts) + return -EINVAL; + + return ntb->ops->msg_clear_sts(ntb, sts_bits); +} + +/** + * ntb_msg_set_mask() - set mask of message register status bits + * @ntb: NTB device context. + * @mask_bits: Mask bits. + * + * Mask the message registers status bits from raising the message event. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_set_mask(struct ntb_dev *ntb, u64 mask_bits) +{ + if (!ntb->ops->msg_set_mask) + return -EINVAL; + + return ntb->ops->msg_set_mask(ntb, mask_bits); +} + +/** + * ntb_msg_clear_mask() - clear message registers mask + * @ntb: NTB device context. + * @mask_bits: Mask bits to clear. + * + * Clear bits in the message events mask register. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits) +{ + if (!ntb->ops->msg_clear_mask) + return -EINVAL; + + return ntb->ops->msg_clear_mask(ntb, mask_bits); +} + +/** + * ntb_msg_read() - read message register with specified index + * @ntb: NTB device context. + * @midx: Message register index + * @pidx: OUT - Port index of peer device a message retrieved from + * @msg: OUT - Data + * + * Read data from the specified message register. Source port index of a + * message is retrieved as well. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx, + u32 *msg) +{ + if (!ntb->ops->msg_read) + return -EINVAL; + + return ntb->ops->msg_read(ntb, midx, pidx, msg); +} + +/** + * ntb_msg_write() - write data to the specified message register + * @ntb: NTB device context. + * @midx: Message register index + * @pidx: Port index of peer device a message being sent to + * @msg: Data to send + * + * Send data to a specified peer device using the defined message register. + * Message event can be raised if the midx registers isn't empty while + * calling this method and the corresponding interrupt isn't masked. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx, + u32 msg) +{ + if (!ntb->ops->msg_write) + return -EINVAL; + + return ntb->ops->msg_write(ntb, midx, pidx, msg); +} + #endif -- cgit v1.2.3 From 85dce3aaae98a8440f4a1a2404bcbab890574b46 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 14 Dec 2016 02:49:20 +0300 Subject: NTB: Add PCIe Gen4 link speed Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index d59688f91618..a6f569727845 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -108,6 +108,7 @@ static inline char *ntb_topo_string(enum ntb_topo topo) * @NTB_SPEED_GEN1: Link is trained to gen1 speed. * @NTB_SPEED_GEN2: Link is trained to gen2 speed. * @NTB_SPEED_GEN3: Link is trained to gen3 speed. + * @NTB_SPEED_GEN4: Link is trained to gen4 speed. */ enum ntb_speed { NTB_SPEED_AUTO = -1, @@ -115,6 +116,7 @@ enum ntb_speed { NTB_SPEED_GEN1 = 1, NTB_SPEED_GEN2 = 2, NTB_SPEED_GEN3 = 3, + NTB_SPEED_GEN4 = 4 }; /** -- cgit v1.2.3 From 3c69f5d6731c43a5b6b9e78b385948e8d76460be Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Tue, 20 Dec 2016 12:50:09 +0300 Subject: NTB: Add ntb.h comments Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index a6f569727845..609e232c00da 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -326,12 +326,17 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) { /* commented callbacks are not required: */ return + /* Port operations are required for multiport devices */ !ops->peer_port_count == !ops->port_number && !ops->peer_port_number == !ops->port_number && !ops->peer_port_idx == !ops->port_number && + + /* Link operations are required */ ops->link_is_up && ops->link_enable && ops->link_disable && + + /* One or both MW interfaces should be developed */ ops->mw_count && ops->mw_get_align && (ops->mw_set_trans || @@ -341,12 +346,11 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) ops->peer_mw_get_addr && /* ops->peer_mw_clear_trans && */ + /* Doorbell operations are mostly required */ /* ops->db_is_unsafe && */ ops->db_valid_mask && - /* both set, or both unset */ - (!ops->db_vector_count == !ops->db_vector_mask) && - + (!ops->db_vector_count == !ops->db_vector_mask) && ops->db_read && /* ops->db_set && */ ops->db_clear && @@ -360,6 +364,8 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* ops->peer_db_read_mask && */ /* ops->peer_db_set_mask && */ /* ops->peer_db_clear_mask && */ + + /* Scrachpads interface is optional */ /* !ops->spad_is_unsafe == !ops->spad_count && */ !ops->spad_read == !ops->spad_count && !ops->spad_write == !ops->spad_count && @@ -367,6 +373,7 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* !ops->peer_spad_read == !ops->spad_count && */ !ops->peer_spad_write == !ops->spad_count && + /* Messaging interface is optional */ !ops->msg_inbits == !ops->msg_count && !ops->msg_outbits == !ops->msg_count && !ops->msg_read_sts == !ops->msg_count && @@ -387,13 +394,12 @@ struct ntb_client { struct device_driver drv; const struct ntb_client_ops ops; }; - #define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv) /** * struct ntb_device - ntb device * @dev: Linux device object. - * @pdev: Pci device entry of the ntb. + * @pdev: PCI device entry of the ntb. * @topo: Detected topology of the ntb. * @ops: See &ntb_dev_ops. * @ctx: See &ntb_ctx_ops. @@ -414,7 +420,6 @@ struct ntb_dev { /* block unregister until device is fully released */ struct completion released; }; - #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) /** @@ -511,7 +516,7 @@ void ntb_link_event(struct ntb_dev *ntb); * multiple interrupt vectors for doorbells, the vector number indicates which * vector received the interrupt. The vector number is relative to the first * vector used for doorbells, starting at zero, and must be less than - ** ntb_db_vector_count(). The driver may call ntb_db_read() to check which + * ntb_db_vector_count(). The driver may call ntb_db_read() to check which * doorbell bits need service, and ntb_db_vector_mask() to determine which of * those bits are associated with the vector number. */ -- cgit v1.2.3 From c80081b9209713e0fe86d3def395a9fc66503c58 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 6 Jul 2017 14:29:04 +0200 Subject: genirq: Allow to pass the IRQF_TIMER flag with percpu irq request The irq timings infrastructure tracks when interrupts occur in order to statistically predict te next interrupt event. There is no point to track timer interrupts and try to predict them because the next expiration time is already known. This can be avoided via the IRQF_TIMER flag which is passed by timer drivers in request_irq(). It marks the interrupt as timer based which alloes to ignore these interrupts in the timings code. Per CPU interrupts which are requested via request_percpu_+irq() have no flag argument, so marking per cpu timer interrupts is not possible and they get tracked pointlessly. Add __request_percpu_irq() as a variant of request_percpu_irq() with a flags argument and make request_percpu_irq() an inline wrapper passing flags = 0. The flag parameter is restricted to IRQF_TIMER as all other IRQF_ flags make no sense for per cpu interrupts. The next step is to convert all existing users of request_percpu_irq() and then remove the wrapper and the underscores. [ tglx: Massaged changelog ] Signed-off-by: Daniel Lezcano Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: nicolas.pitre@linaro.org Cc: vincent.guittot@linaro.org Cc: rafael@kernel.org Link: http://lkml.kernel.org/r/1499344144-3964-1-git-send-email-daniel.lezcano@linaro.org --- include/linux/interrupt.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 37f8e354f564..5ac6e238555e 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -152,8 +152,17 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev_id); extern int __must_check +__request_percpu_irq(unsigned int irq, irq_handler_t handler, + unsigned long flags, const char *devname, + void __percpu *percpu_dev_id); + +static inline int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, - const char *devname, void __percpu *percpu_dev_id); + const char *devname, void __percpu *percpu_dev_id) +{ + return __request_percpu_irq(irq, handler, 0, + devname, percpu_dev_id); +} extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); -- cgit v1.2.3 From 9a04dbcfb33b4012d0ce8c0282f1e3ca694675b1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 6 Jul 2017 15:35:24 -0700 Subject: compiler, clang: always inline when CONFIG_OPTIMIZE_INLINING is disabled The motivation for commit abb2ea7dfd82 ("compiler, clang: suppress warning for unused static inline functions") was to suppress clang's warnings about unused static inline functions. For configs without CONFIG_OPTIMIZE_INLINING enabled, such as any non-x86 architecture, `inline' in the kernel implies that __attribute__((always_inline)) is used. Some code depends on that behavior, see https://lkml.org/lkml/2017/6/13/918: net/built-in.o: In function `__xchg_mb': arch/arm64/include/asm/cmpxchg.h:99: undefined reference to `__compiletime_assert_99' arch/arm64/include/asm/cmpxchg.h:99: undefined reference to `__compiletime_assert_99 The full fix would be to identify these breakages and annotate the functions with __always_inline instead of `inline'. But since we are late in the 4.12-rc cycle, simply carry forward the forced inlining behavior and work toward moving arm64, and other architectures, toward CONFIG_OPTIMIZE_INLINING behavior. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1706261552200.1075@chino.kir.corp.google.com Signed-off-by: David Rientjes Reported-by: Sodagudi Prasad Tested-by: Sodagudi Prasad Tested-by: Matthias Kaehlcke Cc: Mark Rutland Cc: Will Deacon Cc: Catalin Marinas Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 8 -------- include/linux/compiler-gcc.h | 18 +++++++++++------- 2 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index d614c5ea1b5e..de179993e039 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -15,11 +15,3 @@ * with any version that can compile the kernel */ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - -/* - * GCC does not warn about unused static inline functions for - * -Wunused-function. This turns out to avoid the need for complex #ifdef - * directives. Suppress the warning in clang as well. - */ -#undef inline -#define inline inline __attribute__((unused)) notrace diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7deaae3dc87d..cd4bbe8242bd 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,18 +66,22 @@ /* * Force always-inline if the user requests it so via the .config, - * or if gcc is too old: + * or if gcc is too old. + * GCC does not warn about unused static inline functions for + * -Wunused-function. This turns out to avoid the need for complex #ifdef + * directives. Suppress the warning in clang as well by using "unused" + * function attribute, which is redundant but not harmful for gcc. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -#define inline inline __attribute__((always_inline)) notrace -#define __inline__ __inline__ __attribute__((always_inline)) notrace -#define __inline __inline __attribute__((always_inline)) notrace +#define inline inline __attribute__((always_inline,unused)) notrace +#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace +#define __inline __inline __attribute__((always_inline,unused)) notrace #else /* A lot of inline functions can cause havoc with function tracing */ -#define inline inline notrace -#define __inline__ __inline__ notrace -#define __inline __inline notrace +#define inline inline __attribute__((unused)) notrace +#define __inline__ __inline__ __attribute__((unused)) notrace +#define __inline __inline __attribute__((unused)) notrace #endif #define __always_inline inline __attribute__((always_inline)) -- cgit v1.2.3 From 938f846492d6682584cbe4f3f19c4ebffec46311 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 6 Jul 2017 15:35:52 -0700 Subject: provide linux/set_memory.h Currently code that wants to use set_memory_ro() etc, needs to include asm/set_memory.h, which doesn't exist on all arches. Some code knows it only builds on arches which have the header, other code guards the inclusion with an #ifdef, neither is ideal. So create linux/set_memory.h. This always exists, so users don't need an #ifdef just to include the header. When CONFIG_ARCH_HAS_SET_MEMORY=y it includes asm/set_memory.h, otherwise it provides empty non-failing implementations. Link: http://lkml.kernel.org/r/1498717781-29151-1-git-send-email-mpe@ellerman.id.au Signed-off-by: Michael Ellerman Acked-by: Daniel Borkmann Acked-by: Kees Cook Acked-by: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/set_memory.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/set_memory.h (limited to 'include/linux') diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h new file mode 100644 index 000000000000..e5140648f638 --- /dev/null +++ b/include/linux/set_memory.h @@ -0,0 +1,20 @@ +/* + * Copyright 2017, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + */ +#ifndef _LINUX_SET_MEMORY_H_ +#define _LINUX_SET_MEMORY_H_ + +#ifdef CONFIG_ARCH_HAS_SET_MEMORY +#include +#else +static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +#endif + +#endif /* _LINUX_SET_MEMORY_H_ */ -- cgit v1.2.3 From 820a0b24b261c650cb07ea0f60aea9191f658f25 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 6 Jul 2017 15:36:01 -0700 Subject: include/linux/filter.h: use linux/set_memory.h This header always exists, so doesn't require an ifdef around its inclusion. When CONFIG_ARCH_HAS_SET_MEMORY=y it includes the asm header, otherwise it provides empty versions of the set_memory_xx() routines. Link: http://lkml.kernel.org/r/1498717781-29151-4-git-send-email-mpe@ellerman.id.au Signed-off-by: Michael Ellerman Acked-by: Daniel Borkmann Acked-by: Kees Cook Acked-by: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/filter.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index f1fc9baa3509..bfef1e5734f8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -16,13 +16,10 @@ #include #include #include +#include #include -#ifdef CONFIG_ARCH_HAS_SET_MEMORY -#include -#endif - #include #include -- cgit v1.2.3 From d3111e6cce6001e71ddc4737d0d412c2300043a2 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 6 Jul 2017 15:36:28 -0700 Subject: mm/slub.c: pack red_left_pad with another int to save a word Patch series "try to save some memory for kmem_cache in some cases", v2. kmem_cache is a frequently used data in kernel. During the code reading, I found maybe we could save some space in some cases. 1. On 64bit arch, type int will occupy a word if it doesn't sit well. 2. cpu_slab->partial is just used when CONFIG_SLUB_CPU_PARTIAL is set 3. cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is set, while just save some space on 32bit arch. This patch (of 3): On 64bit arch, struct is 8-bytes aligned, so int will occupy a word if it doesn't sit well. This patch pack red_left_pad with reserved to save 8 bytes for struct kmem_cache on a 64bit arch. Link: http://lkml.kernel.org/r/20170502144533.10729-2-richard.weiyang@gmail.com Signed-off-by: Wei Yang Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 93315d6b21a8..070ff84240e7 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -79,9 +79,9 @@ struct kmem_cache { int inuse; /* Offset to metadata */ int align; /* Alignment */ int reserved; /* Reserved bytes at the end of slabs */ + int red_left_pad; /* Left redzone padding size */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ - int red_left_pad; /* Left redzone padding size */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ struct work_struct kobj_remove_work; -- cgit v1.2.3 From a93cf07bc3fb4e7bc924d33c387dabc85086ea38 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 6 Jul 2017 15:36:31 -0700 Subject: mm/slub.c: wrap cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL cpu_slab's field partial is used when CONFIG_SLUB_CPU_PARTIAL is set, which means we can save a pointer's space on each cpu for every slub item. This patch wraps cpu_slab->partial in CONFIG_SLUB_CPU_PARTIAL and wraps its sysfs use too. [akpm@linux-foundation.org: avoid strange 80-col tricks] Link: http://lkml.kernel.org/r/20170502144533.10729-3-richard.weiyang@gmail.com Signed-off-by: Wei Yang Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 070ff84240e7..a3e9492fed02 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -41,12 +41,31 @@ struct kmem_cache_cpu { void **freelist; /* Pointer to next available object */ unsigned long tid; /* Globally unique transaction id */ struct page *page; /* The slab from which we are allocating */ +#ifdef CONFIG_SLUB_CPU_PARTIAL struct page *partial; /* Partially allocated frozen slabs */ +#endif #ifdef CONFIG_SLUB_STATS unsigned stat[NR_SLUB_STAT_ITEMS]; #endif }; +#ifdef CONFIG_SLUB_CPU_PARTIAL +#define slub_percpu_partial(c) ((c)->partial) + +#define slub_set_percpu_partial(c, p) \ +({ \ + slub_percpu_partial(c) = (p)->next; \ +}) + +#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c)) +#else +#define slub_percpu_partial(c) NULL + +#define slub_set_percpu_partial(c, p) + +#define slub_percpu_partial_read_once(c) NULL +#endif // CONFIG_SLUB_CPU_PARTIAL + /* * Word size structure that can be atomically updated or read and that * contains both the order and the number of objects that a slab of the -- cgit v1.2.3 From e6d0e1dcf5f07fb04704b87ffab749589d29cb02 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 6 Jul 2017 15:36:34 -0700 Subject: mm/slub.c: wrap kmem_cache->cpu_partial in config CONFIG_SLUB_CPU_PARTIAL kmem_cache->cpu_partial is just used when CONFIG_SLUB_CPU_PARTIAL is set, so wrap it with config CONFIG_SLUB_CPU_PARTIAL will save some space on 32bit arch. This patch wraps kmem_cache->cpu_partial in config CONFIG_SLUB_CPU_PARTIAL and wraps its sysfs too. Link: http://lkml.kernel.org/r/20170502144533.10729-4-richard.weiyang@gmail.com Signed-off-by: Wei Yang Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index a3e9492fed02..cc0faf3a90be 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -86,7 +86,9 @@ struct kmem_cache { int size; /* The size of an object including meta data */ int object_size; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ +#ifdef CONFIG_SLUB_CPU_PARTIAL int cpu_partial; /* Number of per cpu partial objects to keep around */ +#endif struct kmem_cache_order_objects oo; /* Allocation and freeing of slabs */ @@ -131,6 +133,17 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; +#ifdef CONFIG_SLUB_CPU_PARTIAL +#define slub_cpu_partial(s) ((s)->cpu_partial) +#define slub_set_cpu_partial(s, n) \ +({ \ + slub_cpu_partial(s) = (n); \ +}) +#else +#define slub_cpu_partial(s) (0) +#define slub_set_cpu_partial(s, n) +#endif // CONFIG_SLUB_CPU_PARTIAL + #ifdef CONFIG_SYSFS #define SLAB_SUPPORTS_SYSFS void sysfs_slab_release(struct kmem_cache *); -- cgit v1.2.3 From c4e1be9ec1130fff4d691cdc0e0f9d666009f9ae Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 6 Jul 2017 15:36:44 -0700 Subject: mm, sparsemem: break out of loops early There are a number of times that we loop over NR_MEM_SECTIONS, looking for section_present() on each section. But, when we have very large physical address spaces (large MAX_PHYSMEM_BITS), NR_MEM_SECTIONS becomes very large, making the loops quite long. With MAX_PHYSMEM_BITS=46 and a section size of 128MB, the current loops are 512k iterations, which we barely notice on modern hardware. But, raising MAX_PHYSMEM_BITS higher (like we will see on systems that support 5-level paging) makes this 64x longer and we start to notice, especially on slower systems like simulators. A 10-second delay for 512k iterations is annoying. But, a 640- second delay is crippling. This does not help if we have extremely sparse physical address spaces, but those are quite rare. We expect that most of the "slow" systems where this matters will also be quite small and non-sparse. To fix this, we track the highest section we've ever encountered. This lets us know when we will *never* see another section_present(), and lets us break out of the loops earlier. Doing the whole for_each_present_section_nr() macro is probably overkill, but it will ensure that any future loop iterations that we grow are more likely to be correct. Kirrill said "It shaved almost 40 seconds from boot time in qemu with 5-level paging enabled for me". Link: http://lkml.kernel.org/r/20170504174434.C45A4735@viggo.jf.intel.com Signed-off-by: Dave Hansen Tested-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ef6a13b7bd3e..fc39f85d273c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1180,6 +1180,8 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn) return __nr_to_section(pfn_to_section_nr(pfn)); } +extern int __highest_present_section_nr; + #ifndef CONFIG_HAVE_ARCH_PFN_VALID static inline int pfn_valid(unsigned long pfn) { -- cgit v1.2.3 From 38d8b4e6bdc872f07a3149309ab01719c96f3894 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 6 Jul 2017 15:37:18 -0700 Subject: mm, THP, swap: delay splitting THP during swap out Patch series "THP swap: Delay splitting THP during swapping out", v11. This patchset is to optimize the performance of Transparent Huge Page (THP) swap. Recently, the performance of the storage devices improved so fast that we cannot saturate the disk bandwidth with single logical CPU when do page swap out even on a high-end server machine. Because the performance of the storage device improved faster than that of single logical CPU. And it seems that the trend will not change in the near future. On the other hand, the THP becomes more and more popular because of increased memory size. So it becomes necessary to optimize THP swap performance. The advantages of the THP swap support include: - Batch the swap operations for the THP to reduce lock acquiring/releasing, including allocating/freeing the swap space, adding/deleting to/from the swap cache, and writing/reading the swap space, etc. This will help improve the performance of the THP swap. - The THP swap space read/write will be 2M sequential IO. It is particularly helpful for the swap read, which are usually 4k random IO. This will improve the performance of the THP swap too. - It will help the memory fragmentation, especially when the THP is heavily used by the applications. The 2M continuous pages will be free up after THP swapping out. - It will improve the THP utilization on the system with the swap turned on. Because the speed for khugepaged to collapse the normal pages into the THP is quite slow. After the THP is split during the swapping out, it will take quite long time for the normal pages to collapse back into the THP after being swapped in. The high THP utilization helps the efficiency of the page based memory management too. There are some concerns regarding THP swap in, mainly because possible enlarged read/write IO size (for swap in/out) may put more overhead on the storage device. To deal with that, the THP swap in should be turned on only when necessary. For example, it can be selected via "always/never/madvise" logic, to be turned on globally, turned off globally, or turned on only for VMA with MADV_HUGEPAGE, etc. This patchset is the first step for the THP swap support. The plan is to delay splitting THP step by step, finally avoid splitting THP during the THP swapping out and swap out/in the THP as a whole. As the first step, in this patchset, the splitting huge page is delayed from almost the first step of swapping out to after allocating the swap space for the THP and adding the THP into the swap cache. This will reduce lock acquiring/releasing for the locks used for the swap cache management. With the patchset, the swap out throughput improves 15.5% (from about 3.73GB/s to about 4.31GB/s) in the vm-scalability swap-w-seq test case with 8 processes. The test is done on a Xeon E5 v3 system. The swap device used is a RAM simulated PMEM (persistent memory) device. To test the sequential swapping out, the test case creates 8 processes, which sequentially allocate and write to the anonymous pages until the RAM and part of the swap device is used up. This patch (of 5): In this patch, splitting huge page is delayed from almost the first step of swapping out to after allocating the swap space for the THP (Transparent Huge Page) and adding the THP into the swap cache. This will batch the corresponding operation, thus improve THP swap out throughput. This is the first step for the THP swap optimization. The plan is to delay splitting the THP step by step and avoid splitting the THP finally. In this patch, one swap cluster is used to hold the contents of each THP swapped out. So, the size of the swap cluster is changed to that of the THP (Transparent Huge Page) on x86_64 architecture (512). For other architectures which want such THP swap optimization, ARCH_USES_THP_SWAP_CLUSTER needs to be selected in the Kconfig file for the architecture. In effect, this will enlarge swap cluster size by 2 times on x86_64. Which may make it harder to find a free cluster when the swap space becomes fragmented. So that, this may reduce the continuous swap space allocation and sequential write in theory. The performance test in 0day shows no regressions caused by this. In the future of THP swap optimization, some information of the swapped out THP (such as compound map count) will be recorded in the swap_cluster_info data structure. The mem cgroup swap accounting functions are enhanced to support charge or uncharge a swap cluster backing a THP as a whole. The swap cluster allocate/free functions are added to allocate/free a swap cluster for a THP. A fair simple algorithm is used for swap cluster allocation, that is, only the first swap device in priority list will be tried to allocate the swap cluster. The function will fail if the trying is not successful, and the caller will fallback to allocate a single swap slot instead. This works good enough for normal cases. If the difference of the number of the free swap clusters among multiple swap devices is significant, it is possible that some THPs are split earlier than necessary. For example, this could be caused by big size difference among multiple swap devices. The swap cache functions is enhanced to support add/delete THP to/from the swap cache as a set of (HPAGE_PMD_NR) sub-pages. This may be enhanced in the future with multi-order radix tree. But because we will split the THP soon during swapping out, that optimization doesn't make much sense for this first step. The THP splitting functions are enhanced to support to split THP in swap cache during swapping out. The page lock will be held during allocating the swap cluster, adding the THP into the swap cache and splitting the THP. So in the code path other than swapping out, if the THP need to be split, the PageSwapCache(THP) will be always false. The swap cluster is only available for SSD, so the THP swap optimization in this patchset has no effect for HDD. [ying.huang@intel.com: fix two issues in THP optimize patch] Link: http://lkml.kernel.org/r/87k25ed8zo.fsf@yhuang-dev.intel.com [hannes@cmpxchg.org: extensive cleanups and simplifications, reduce code size] Link: http://lkml.kernel.org/r/20170515112522.32457-2-ying.huang@intel.com Signed-off-by: "Huang, Ying" Signed-off-by: Johannes Weiner Suggested-by: Andrew Morton [for config option] Acked-by: Kirill A. Shutemov [for changes in huge_memory.c and huge_mm.h] Cc: Andrea Arcangeli Cc: Ebru Akagunduz Cc: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Cc: Hugh Dickins Cc: Shaohua Li Cc: Minchan Kim Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 7 +++++-- include/linux/swap.h | 19 ++++++++++++++----- include/linux/swap_cgroup.h | 6 ++++-- 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6b5818d6de32..d33e3280c8ad 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -326,11 +326,14 @@ PAGEFLAG_FALSE(HighMem) #ifdef CONFIG_SWAP static __always_inline int PageSwapCache(struct page *page) { +#ifdef CONFIG_THP_SWAP + page = compound_head(page); +#endif return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags); } -SETPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND) -CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND) +SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) +CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) #else PAGEFLAG_FALSE(SwapCache) #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index ba5882419a7d..d18876384de0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -386,9 +386,9 @@ static inline long get_nr_swap_pages(void) } extern void si_swapinfo(struct sysinfo *); -extern swp_entry_t get_swap_page(void); +extern swp_entry_t get_swap_page(struct page *page); extern swp_entry_t get_swap_page_of_type(int); -extern int get_swap_pages(int n, swp_entry_t swp_entries[]); +extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); @@ -515,7 +515,7 @@ static inline int try_to_free_swap(struct page *page) return 0; } -static inline swp_entry_t get_swap_page(void) +static inline swp_entry_t get_swap_page(struct page *page) { swp_entry_t entry; entry.val = 0; @@ -548,7 +548,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) #ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); -extern void mem_cgroup_uncharge_swap(swp_entry_t entry); +extern void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct page *page); #else @@ -562,7 +562,8 @@ static inline int mem_cgroup_try_charge_swap(struct page *page, return 0; } -static inline void mem_cgroup_uncharge_swap(swp_entry_t entry) +static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, + unsigned int nr_pages) { } @@ -577,5 +578,13 @@ static inline bool mem_cgroup_swap_full(struct page *page) } #endif +#ifdef CONFIG_THP_SWAP +extern void swapcache_free_cluster(swp_entry_t entry); +#else +static inline void swapcache_free_cluster(swp_entry_t entry) +{ +} +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h index 145306bdc92f..b2b8ec7bda3f 100644 --- a/include/linux/swap_cgroup.h +++ b/include/linux/swap_cgroup.h @@ -7,7 +7,8 @@ extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); -extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); +extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, + unsigned int nr_ents); extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); extern int swap_cgroup_swapon(int type, unsigned long max_pages); extern void swap_cgroup_swapoff(int type); @@ -15,7 +16,8 @@ extern void swap_cgroup_swapoff(int type); #else static inline -unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id) +unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, + unsigned int nr_ents) { return 0; } -- cgit v1.2.3 From 75f6d6d29a40b5541f0f107201cf7dec134ad210 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 6 Jul 2017 15:37:21 -0700 Subject: mm, THP, swap: unify swap slot free functions to put_swap_page Now, get_swap_page takes struct page and allocates swap space according to page size(ie, normal or THP) so it would be more cleaner to introduce put_swap_page which is a counter function of get_swap_page. Then, it calls right swap slot free function depending on page's size. [ying.huang@intel.com: minor cleanup and fix] Link: http://lkml.kernel.org/r/20170515112522.32457-3-ying.huang@intel.com Signed-off-by: Minchan Kim Signed-off-by: "Huang, Ying" Acked-by: Johannes Weiner Cc: Andrea Arcangeli Cc: Ebru Akagunduz Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Rik van Riel Cc: Shaohua Li Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index d18876384de0..ead6fd7966b4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -387,6 +387,7 @@ static inline long get_nr_swap_pages(void) extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(struct page *page); +extern void put_swap_page(struct page *page, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]); extern int add_swap_count_continuation(swp_entry_t, gfp_t); @@ -394,7 +395,6 @@ extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); -extern void swapcache_free(swp_entry_t); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); @@ -453,7 +453,7 @@ static inline void swap_free(swp_entry_t swp) { } -static inline void swapcache_free(swp_entry_t swp) +static inline void put_swap_page(struct page *page, swp_entry_t swp) { } @@ -578,13 +578,5 @@ static inline bool mem_cgroup_swap_full(struct page *page) } #endif -#ifdef CONFIG_THP_SWAP -extern void swapcache_free_cluster(swp_entry_t entry); -#else -static inline void swapcache_free_cluster(swp_entry_t entry) -{ -} -#endif - #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ -- cgit v1.2.3 From 0f0746589e4be071a8f890b2035c97c30c7a4e16 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 6 Jul 2017 15:37:24 -0700 Subject: mm, THP, swap: move anonymous THP split logic to vmscan The add_to_swap aims to allocate swap_space(ie, swap slot and swapcache) so if it fails due to lack of space in case of THP or something(hdd swap but tries THP swapout) *caller* rather than add_to_swap itself should split the THP page and retry it with base page which is more natural. Link: http://lkml.kernel.org/r/20170515112522.32457-4-ying.huang@intel.com Signed-off-by: Minchan Kim Signed-off-by: "Huang, Ying" Acked-by: Johannes Weiner Cc: Andrea Arcangeli Cc: Ebru Akagunduz Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Rik van Riel Cc: Shaohua Li Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index ead6fd7966b4..5ab1c98c7d27 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -353,7 +353,7 @@ extern struct address_space *swapper_spaces[]; >> SWAP_ADDRESS_SPACE_SHIFT]) extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *, struct list_head *list); +extern int add_to_swap(struct page *page); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); extern int __add_to_swap_cache(struct page *page, swp_entry_t entry); extern void __delete_from_swap_cache(struct page *); @@ -473,7 +473,7 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp) return NULL; } -static inline int add_to_swap(struct page *page, struct list_head *list) +static inline int add_to_swap(struct page *page) { return 0; } -- cgit v1.2.3 From b8f593cd0896b8b14c2b494a9776531b5cd54d98 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 6 Jul 2017 15:37:28 -0700 Subject: mm, THP, swap: check whether THP can be split firstly To swap out THP (Transparent Huage Page), before splitting the THP, the swap cluster will be allocated and the THP will be added into the swap cache. But it is possible that the THP cannot be split, so that we must delete the THP from the swap cache and free the swap cluster. To avoid that, in this patch, whether the THP can be split is checked firstly. The check can only be done racy, but it is good enough for most cases. With the patch, the swap out throughput improves 3.6% (from about 4.16GB/s to about 4.31GB/s) in the vm-scalability swap-w-seq test case with 8 processes. The test is done on a Xeon E5 v3 system. The swap device used is a RAM simulated PMEM (persistent memory) device. To test the sequential swapping out, the test case creates 8 processes, which sequentially allocate and write to the anonymous pages until the RAM and part of the swap device is used up. Link: http://lkml.kernel.org/r/20170515112522.32457-5-ying.huang@intel.com Signed-off-by: "Huang, Ying" Acked-by: Kirill A. Shutemov [for can_split_huge_page()] Cc: Johannes Weiner Cc: Andrea Arcangeli Cc: Ebru Akagunduz Cc: Hugh Dickins Cc: Michal Hocko Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a3762d49ba39..d3b3e8fcc717 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -113,6 +113,7 @@ extern unsigned long thp_get_unmapped_area(struct file *filp, extern void prep_transhuge_page(struct page *page); extern void free_transhuge_page(struct page *page); +bool can_split_huge_page(struct page *page, int *pextra_pins); int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { @@ -231,6 +232,12 @@ static inline void prep_transhuge_page(struct page *page) {} #define thp_get_unmapped_area NULL +static inline bool +can_split_huge_page(struct page *page, int *pextra_pins) +{ + BUILD_BUG(); + return false; +} static inline int split_huge_page_to_list(struct page *page, struct list_head *list) { -- cgit v1.2.3 From dc0bbf3b7fb9ed2246f62bba4379070589e2135c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:37:35 -0700 Subject: mm: remove return value from init_currently_empty_zone Patch series "mm: make movable onlining suck less", v4. Movable onlining is a real hack with many downsides - mainly reintroduction of lowmem/highmem issues we used to have on 32b systems - but it is the only way to make the memory hotremove more reliable which is something that people are asking for. The current semantic of memory movable onlinening is really cumbersome, however. The main reason for this is that the udev driven approach is basically unusable because udev races with the memory probing while only the last memory block or the one adjacent to the existing zone_movable are allowed to be onlined movable. In short the criterion for the successful online_movable changes under udev's feet. A reliable udev approach would require a 2 phase approach where the first successful movable online would have to check all the previous blocks and online them in descending order. This is hard to be considered sane. This patchset aims at making the onlining semantic more usable. First of all it allows to online memory movable as long as it doesn't clash with the existing ZONE_NORMAL. That means that ZONE_NORMAL and ZONE_MOVABLE cannot overlap. Currently I preserve the original ordering semantic so the zone always precedes the movable zone but I have plans to remove this restriction in future because it is not really necessary. First 3 patches are cleanups which should be ready to be merged right away (unless I have missed something subtle of course). Patch 4 deals with ZONE_DEVICE dependencies down the __add_pages path. Patch 5 deals with implicit assumptions of register_one_node on pgdat initialization. Patches 6-10 deal with offline holes in the zone for pfn walkers. I hope I got all of them right but people familiar with compaction should double check this. Patch 11 is the core of the change. In order to make it easier to review I have tried it to be as minimalistic as possible and the large code removal is moved to patch 14. Patch 12 is a trivial follow up cleanup. Patch 13 fixes sparse warnings and finally patch 14 removes the unused code. I have tested the patches in kvm: # qemu-system-x86_64 -enable-kvm -monitor pty -m 2G,slots=4,maxmem=4G -numa node,mem=1G -numa node,mem=1G ... and then probed the additional memory by (qemu) object_add memory-backend-ram,id=mem1,size=1G (qemu) device_add pc-dimm,id=dimm1,memdev=mem1 Then I have used this simple script to probe the memory block by hand # cat probe_memblock.sh #!/bin/sh BLOCK_NR=$1 # echo $((0x100000000+$BLOCK_NR*(128<<20))) > /sys/devices/system/memory/probe # for i in $(seq 10); do sh probe_memblock.sh $i; done # grep . /sys/devices/system/memory/memory3?/valid_zones 2>/dev/null /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Normal Movable /sys/devices/system/memory/memory35/valid_zones:Normal Movable /sys/devices/system/memory/memory36/valid_zones:Normal Movable /sys/devices/system/memory/memory37/valid_zones:Normal Movable /sys/devices/system/memory/memory38/valid_zones:Normal Movable /sys/devices/system/memory/memory39/valid_zones:Normal Movable The main difference to the original implementation is that all new memblocks can be both online_kernel and online_movable initially because there is no clash obviously. For the comparison the original implementation would have /sys/devices/system/memory/memory33/valid_zones:Normal /sys/devices/system/memory/memory34/valid_zones:Normal /sys/devices/system/memory/memory35/valid_zones:Normal /sys/devices/system/memory/memory36/valid_zones:Normal /sys/devices/system/memory/memory37/valid_zones:Normal /sys/devices/system/memory/memory38/valid_zones:Normal /sys/devices/system/memory/memory39/valid_zones:Normal Movable Now # echo online_movable > /sys/devices/system/memory/memory34/state # grep . /sys/devices/system/memory/memory3?/valid_zones 2>/dev/null /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Movable /sys/devices/system/memory/memory35/valid_zones:Movable /sys/devices/system/memory/memory36/valid_zones:Movable /sys/devices/system/memory/memory37/valid_zones:Movable /sys/devices/system/memory/memory38/valid_zones:Movable /sys/devices/system/memory/memory39/valid_zones:Movable Block 33 can still be online both kernel and movable while all the remaining can be only movable. /proc/zonelist says Node 0, zone Normal pages free 0 min 0 low 0 high 0 spanned 0 present 0 -- Node 0, zone Movable pages free 32753 min 85 low 117 high 149 spanned 32768 present 32768 A new memblock at a lower address will result in a new memblock (32) which will still allow both Normal and Movable. # sh probe_memblock.sh 0 # grep . /sys/devices/system/memory/memory3[2-5]/valid_zones 2>/dev/null /sys/devices/system/memory/memory32/valid_zones:Normal Movable /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Movable /sys/devices/system/memory/memory35/valid_zones:Movable and online_kernel will convert it to the zone normal properly while 33 can be still onlined both ways. # echo online_kernel > /sys/devices/system/memory/memory32/state # grep . /sys/devices/system/memory/memory3[2-5]/valid_zones 2>/dev/null /sys/devices/system/memory/memory32/valid_zones:Normal /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Movable /sys/devices/system/memory/memory35/valid_zones:Movable /proc/zoneinfo will now tell Node 0, zone Normal pages free 65441 min 165 low 230 high 295 spanned 65536 present 65536 -- Node 0, zone Movable pages free 32740 min 82 low 114 high 146 spanned 32768 present 32768 so both zones have one memblock spanned and present. Onlining 39 should associate this block to the movable zone # echo online > /sys/devices/system/memory/memory39/state /proc/zoneinfo will now tell Node 0, zone Normal pages free 32765 min 80 low 112 high 144 spanned 32768 present 32768 -- Node 0, zone Movable pages free 65501 min 160 low 225 high 290 spanned 196608 present 65536 so we will have a movable zone which spans 6 memblocks, 2 present and 4 representing a hole. Offlining both movable blocks will lead to the zone with no present pages which is the expected behavior I believe. # echo offline > /sys/devices/system/memory/memory39/state # echo offline > /sys/devices/system/memory/memory34/state # grep -A6 "Movable\|Normal" /proc/zoneinfo Node 0, zone Normal pages free 32735 min 90 low 122 high 154 spanned 32768 present 32768 -- Node 0, zone Movable pages free 0 min 0 low 0 high 0 spanned 196608 present 0 As a bonus we will get a nice cleanup in the memory hotplug codebase. This patch (of 16): init_currently_empty_zone doesn't have any error to return yet it is still an int and callers try to be defensive and try to handle potential error. Remove this nonsense and simplify all callers. This patch shouldn't have any visible effect Link: http://lkml.kernel.org/r/20170515085827.16474-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Yasuaki Ishimatsu Acked-by: Balbir Singh Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dan Williams Cc: Daniel Kiper Cc: David Rientjes Cc: Heiko Carstens Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Reza Arbab Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fc39f85d273c..976a1202bec1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -772,7 +772,7 @@ enum memmap_context { MEMMAP_EARLY, MEMMAP_HOTPLUG, }; -extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, +extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, unsigned long size); extern void lruvec_init(struct lruvec *lruvec); -- cgit v1.2.3 From 1b862aecfbd419cdc4553645bf86d07554279bed Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:37:45 -0700 Subject: mm, memory_hotplug: get rid of is_zone_device_section Device memory hotplug hooks into regular memory hotplug only half way. It needs memory sections to track struct pages but there is no need/desire to associate those sections with memory blocks and export them to the userspace via sysfs because they cannot be onlined anyway. This is currently expressed by for_device argument to arch_add_memory which then makes sure to associate the given memory range with ZONE_DEVICE. register_new_memory then relies on is_zone_device_section to distinguish special memory hotplug from the regular one. While this works now, later patches in this series want to move __add_zone outside of arch_add_memory path so we have to come up with something else. Add want_memblock down the __add_pages path and use it to control whether the section->memblock association should be done. arch_add_memory then just trivially want memblock for everything but for_device hotplug. remove_memory_section doesn't need is_zone_device_section either. We can simply skip all the memblock specific cleanup if there is no memblock for the given section. This shouldn't introduce any functional change. Link: http://lkml.kernel.org/r/20170515085827.16474-5-mhocko@kernel.org Signed-off-by: Michal Hocko Tested-by: Dan Williams Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Balbir Singh Cc: Daniel Kiper Cc: David Rientjes Cc: Heiko Carstens Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Reza Arbab Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 134a2f69c21a..3c8cf86201c3 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -111,7 +111,7 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, /* reasonably generic interface to expand the physical pages in a zone */ extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); + unsigned long nr_pages, bool want_memblock); #ifdef CONFIG_NUMA extern int memory_add_physaddr_to_nid(u64 start); -- cgit v1.2.3 From 9037a9934349b0e180896fc8cacaf1819418ba03 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:37:49 -0700 Subject: mm, memory_hotplug: split up register_one_node() Memory hotplug (add_memory_resource) has to reinitialize node infrastructure if the node is offline (one which went through the complete add_memory(); remove_memory() cycle). That involves node registration to the kobj infrastructure (register_node), the proper association with cpus (register_cpu_under_node) and finally creation of node<->memblock symlinks (link_mem_sections). The last part requires to know node_start_pfn and node_spanned_pages which we currently have but a leter patch will postpone this initialization to the onlining phase which happens later. In fact we do not need to rely on the early pgdat initialization even now because the currently hot added pfn range is currently known. Split register_one_node into core which does all the common work for the boot time NUMA initialization and the hotplug (__register_one_node). register_one_node keeps the full initialization while hotplug calls __register_one_node and manually calls link_mem_sections for the proper range. This shouldn't introduce any functional change. Link: http://lkml.kernel.org/r/20170515085827.16474-6-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Balbir Singh Cc: Dan Williams Cc: Daniel Kiper Cc: David Rientjes Cc: Heiko Carstens Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Reza Arbab Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/node.h | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/node.h b/include/linux/node.h index 2115ad5d6f19..d1751beb462c 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -30,9 +30,38 @@ struct memory_block; extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) +extern int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages); +#else +static inline int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages) +{ + return 0; +} +#endif + extern void unregister_node(struct node *node); #ifdef CONFIG_NUMA -extern int register_one_node(int nid); +/* Core of the node registration - only memory hotplug should use this */ +extern int __register_one_node(int nid); + +/* Registers an online node */ +static inline int register_one_node(int nid) +{ + int error = 0; + + if (node_online(nid)) { + struct pglist_data *pgdat = NODE_DATA(nid); + + error = __register_one_node(nid); + if (error) + return error; + /* link memory sections under this node */ + error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages); + } + + return error; +} + extern void unregister_one_node(int nid); extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); @@ -46,6 +75,10 @@ extern void register_hugetlbfs_with_node(node_registration_func_t doregister, node_registration_func_t unregister); #endif #else +static inline int __register_one_node(int nid) +{ + return 0; +} static inline int register_one_node(int nid) { return 0; -- cgit v1.2.3 From 2d070eab2e8270c8a84d480bb91e4f739315f03d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:37:56 -0700 Subject: mm: consider zone which is not fully populated to have holes __pageblock_pfn_to_page has two users currently, set_zone_contiguous which checks whether the given zone contains holes and pageblock_pfn_to_page which then carefully returns a first valid page from the given pfn range for the given zone. This doesn't handle zones which are not fully populated though. Memory pageblocks can be offlined or might not have been onlined yet. In such a case the zone should be considered to have holes otherwise pfn walkers can touch and play with offline pages. Current callers of pageblock_pfn_to_page in compaction seem to work properly right now because they only isolate PageBuddy (isolate_freepages_block) or PageLRU resp. __PageMovable (isolate_migratepages_block) which will be always false for these pages. It would be safer to skip these pages altogether, though. In order to do this patch adds a new memory section state (SECTION_IS_ONLINE) which is set in memory_present (during boot time) or in online_pages_range during the memory hotplug. Similarly offline_mem_sections clears the bit and it is called when the memory range is offlined. pfn_to_online_page helper is then added which check the mem section and only returns a page if it is onlined already. Use the new helper in __pageblock_pfn_to_page and skip the whole page block in such a case. [mhocko@suse.com: check valid section number in pfn_to_online_page (Vlastimil), mark sections online after all struct pages are initialized in online_pages_range (Vlastimil)] Link: http://lkml.kernel.org/r/20170518164210.GD18333@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20170515085827.16474-8-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Balbir Singh Cc: Dan Williams Cc: Daniel Kiper Cc: David Rientjes Cc: Heiko Carstens Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Reza Arbab Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 22 ++++++++++++++++++++++ include/linux/mmzone.h | 35 +++++++++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 3c8cf86201c3..a61aede1b391 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -14,6 +14,20 @@ struct memory_block; struct resource; #ifdef CONFIG_MEMORY_HOTPLUG +/* + * Return page for the valid pfn only if the page is online. All pfn + * walkers which rely on the fully initialized page->flags and others + * should use this rather than pfn_valid && pfn_to_page + */ +#define pfn_to_online_page(pfn) \ +({ \ + struct page *___page = NULL; \ + unsigned long ___nr = pfn_to_section_nr(pfn); \ + \ + if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\ + ___page = pfn_to_page(pfn); \ + ___page; \ +}) /* * Types for free bootmem stored in page->lru.next. These have to be in @@ -203,6 +217,14 @@ extern void set_zone_contiguous(struct zone *zone); extern void clear_zone_contiguous(struct zone *zone); #else /* ! CONFIG_MEMORY_HOTPLUG */ +#define pfn_to_online_page(pfn) \ +({ \ + struct page *___page = NULL; \ + if (pfn_valid(pfn)) \ + ___page = pfn_to_page(pfn); \ + ___page; \ + }) + /* * Stub functions for when hotplug is off */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 976a1202bec1..2aaf7e08c5a8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1144,9 +1144,10 @@ extern unsigned long usemap_size(void); */ #define SECTION_MARKED_PRESENT (1UL<<0) #define SECTION_HAS_MEM_MAP (1UL<<1) -#define SECTION_MAP_LAST_BIT (1UL<<2) +#define SECTION_IS_ONLINE (1UL<<2) +#define SECTION_MAP_LAST_BIT (1UL<<3) #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) -#define SECTION_NID_SHIFT 2 +#define SECTION_NID_SHIFT 3 static inline struct page *__section_mem_map_addr(struct mem_section *section) { @@ -1175,6 +1176,23 @@ static inline int valid_section_nr(unsigned long nr) return valid_section(__nr_to_section(nr)); } +static inline int online_section(struct mem_section *section) +{ + return (section && (section->section_mem_map & SECTION_IS_ONLINE)); +} + +static inline int online_section_nr(unsigned long nr) +{ + return online_section(__nr_to_section(nr)); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn); +#ifdef CONFIG_MEMORY_HOTREMOVE +void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn); +#endif +#endif + static inline struct mem_section *__pfn_to_section(unsigned long pfn) { return __nr_to_section(pfn_to_section_nr(pfn)); @@ -1253,10 +1271,15 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL /* * pfn_valid() is meant to be able to tell if a given PFN has valid memmap - * associated with it or not. In FLATMEM, it is expected that holes always - * have valid memmap as long as there is valid PFNs either side of the hole. - * In SPARSEMEM, it is assumed that a valid section has a memmap for the - * entire section. + * associated with it or not. This means that a struct page exists for this + * pfn. The caller cannot assume the page is fully initialized in general. + * Hotplugable pages might not have been onlined yet. pfn_to_online_page() + * will ensure the struct page is fully online and initialized. Special pages + * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly. + * + * In FLATMEM, it is expected that holes always have valid memmap as long as + * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed + * that a valid section has a memmap for the entire section. * * However, an ARM, and maybe other embedded architectures in the future * free memmap backing holes to save memory on the assumption the memmap is -- cgit v1.2.3 From f1dd2cd13c4bbbc9a7c4617b3b034fa643de98fe Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:38:11 -0700 Subject: mm, memory_hotplug: do not associate hotadded memory to zones until online The current memory hotplug implementation relies on having all the struct pages associate with a zone/node during the physical hotplug phase (arch_add_memory->__add_pages->__add_section->__add_zone). In the vast majority of cases this means that they are added to ZONE_NORMAL. This has been so since 9d99aaa31f59 ("[PATCH] x86_64: Support memory hotadd without sparsemem") and it wasn't a big deal back then because movable onlining didn't exist yet. Much later memory hotplug wanted to (ab)use ZONE_MOVABLE for movable onlining 511c2aba8f07 ("mm, memory-hotplug: dynamic configure movable memory and portion memory") and then things got more complicated. Rather than reconsidering the zone association which was no longer needed (because the memory hotplug already depended on SPARSEMEM) a convoluted semantic of zone shifting has been developed. Only the currently last memblock or the one adjacent to the zone_movable can be onlined movable. This essentially means that the online type changes as the new memblocks are added. Let's simulate memory hot online manually $ echo 0x100000000 > /sys/devices/system/memory/probe $ grep . /sys/devices/system/memory/memory32/valid_zones Normal Movable $ echo $((0x100000000+(128<<20))) > /sys/devices/system/memory/probe $ grep . /sys/devices/system/memory/memory3?/valid_zones /sys/devices/system/memory/memory32/valid_zones:Normal /sys/devices/system/memory/memory33/valid_zones:Normal Movable $ echo $((0x100000000+2*(128<<20))) > /sys/devices/system/memory/probe $ grep . /sys/devices/system/memory/memory3?/valid_zones /sys/devices/system/memory/memory32/valid_zones:Normal /sys/devices/system/memory/memory33/valid_zones:Normal /sys/devices/system/memory/memory34/valid_zones:Normal Movable $ echo online_movable > /sys/devices/system/memory/memory34/state $ grep . /sys/devices/system/memory/memory3?/valid_zones /sys/devices/system/memory/memory32/valid_zones:Normal /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Movable Normal This is an awkward semantic because an udev event is sent as soon as the block is onlined and an udev handler might want to online it based on some policy (e.g. association with a node) but it will inherently race with new blocks showing up. This patch changes the physical online phase to not associate pages with any zone at all. All the pages are just marked reserved and wait for the onlining phase to be associated with the zone as per the online request. There are only two requirements - existing ZONE_NORMAL and ZONE_MOVABLE cannot overlap - ZONE_NORMAL precedes ZONE_MOVABLE in physical addresses the latter one is not an inherent requirement and can be changed in the future. It preserves the current behavior and made the code slightly simpler. This is subject to change in future. This means that the same physical online steps as above will lead to the following state: Normal Movable /sys/devices/system/memory/memory32/valid_zones:Normal Movable /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory32/valid_zones:Normal Movable /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Normal Movable /sys/devices/system/memory/memory32/valid_zones:Normal Movable /sys/devices/system/memory/memory33/valid_zones:Normal Movable /sys/devices/system/memory/memory34/valid_zones:Movable Implementation: The current move_pfn_range is reimplemented to check the above requirements (allow_online_pfn_range) and then updates the respective zone (move_pfn_range_to_zone), the pgdat and links all the pages in the pfn range with the zone/node. __add_pages is updated to not require the zone and only initializes sections in the range. This allowed to simplify the arch_add_memory code (s390 could get rid of quite some of code). devm_memremap_pages is the only user of arch_add_memory which relies on the zone association because it only hooks into the memory hotplug only half way. It uses it to associate the new memory with ZONE_DEVICE but doesn't allow it to be {on,off}lined via sysfs. This means that this particular code path has to call move_pfn_range_to_zone explicitly. The original zone shifting code is kept in place and will be removed in the follow up patch for an easier review. Please note that this patch also changes the original behavior when offlining a memory block adjacent to another zone (Normal vs. Movable) used to allow to change its movable type. This will be handled later. [richard.weiyang@gmail.com: simplify zone_intersects()] Link: http://lkml.kernel.org/r/20170616092335.5177-1-richard.weiyang@gmail.com [richard.weiyang@gmail.com: remove duplicate call for set_page_links] Link: http://lkml.kernel.org/r/20170616092335.5177-2-richard.weiyang@gmail.com [akpm@linux-foundation.org: remove unused local `i'] Link: http://lkml.kernel.org/r/20170515085827.16474-12-mhocko@kernel.org Signed-off-by: Michal Hocko Signed-off-by: Wei Yang Tested-by: Dan Williams Tested-by: Reza Arbab Acked-by: Heiko Carstens # For s390 bits Acked-by: Vlastimil Babka Cc: Martin Schwidefsky Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Balbir Singh Cc: Daniel Kiper Cc: David Rientjes Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 13 +++++++------ include/linux/mmzone.h | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index a61aede1b391..8a07a49fd8dc 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -123,8 +123,8 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); #endif /* CONFIG_MEMORY_HOTREMOVE */ -/* reasonably generic interface to expand the physical pages in a zone */ -extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn, +/* reasonably generic interface to expand the physical pages */ +extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, bool want_memblock); #ifdef CONFIG_NUMA @@ -299,15 +299,16 @@ extern int add_memory_resource(int nid, struct resource *resource, bool online); extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, bool for_device); extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); +extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern void remove_memory(int nid, u64 start, u64 size); -extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn); +extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); -extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, - enum zone_type target, int *zone_shift); - +extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, + int online_type); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2aaf7e08c5a8..abc1641011f2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -532,6 +532,22 @@ static inline bool zone_is_empty(struct zone *zone) return zone->spanned_pages == 0; } +/* + * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty + * intersection with the given zone + */ +static inline bool zone_intersects(struct zone *zone, + unsigned long start_pfn, unsigned long nr_pages) +{ + if (zone_is_empty(zone)) + return false; + if (start_pfn >= zone_end_pfn(zone) || + start_pfn + nr_pages <= zone->zone_start_pfn) + return false; + + return true; +} + /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the -- cgit v1.2.3 From c246a213f5bad687c6c2cea27d7265eaf8f6f5d7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:38:18 -0700 Subject: mm, memory_hotplug: do not assume ZONE_NORMAL is default kernel zone Heiko Carstens has noticed that he can generate overlapping zones for ZONE_DMA and ZONE_NORMAL: DMA [mem 0x0000000000000000-0x000000007fffffff] Normal [mem 0x0000000080000000-0x000000017fffffff] $ cat /sys/devices/system/memory/block_size_bytes 10000000 $ cat /sys/devices/system/memory/memory5/valid_zones DMA $ echo 0 > /sys/devices/system/memory/memory5/online $ cat /sys/devices/system/memory/memory5/valid_zones Normal $ echo 1 > /sys/devices/system/memory/memory5/online Normal $ cat /proc/zoneinfo Node 0, zone DMA spanned 524288 <----- present 458752 managed 455078 start_pfn: 0 <----- Node 0, zone Normal spanned 720896 present 589824 managed 571648 start_pfn: 327680 <----- The reason is that we assume that the default zone for kernel onlining is ZONE_NORMAL. This was a simplification introduced by the memory hotplug rework and it is easily fixable by checking the range overlap in the zone order and considering the first matching zone as the default one. If there is no such zone then assume ZONE_NORMAL as we have been doing so far. Fixes: "mm, memory_hotplug: do not associate hotadded memory to zones until online" Link: http://lkml.kernel.org/r/20170601083746.4924-3-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Heiko Carstens Tested-by: Heiko Carstens Acked-by: Vlastimil Babka Cc: Dan Williams Cc: Reza Arbab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8a07a49fd8dc..4d65a2fcac15 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -311,4 +311,6 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, int online_type); +extern struct zone *default_zone_for_pfn(int nid, unsigned long pfn, + unsigned long nr_pages); #endif /* __LINUX_MEMORY_HOTPLUG_H */ -- cgit v1.2.3 From 3d79a728f9b2e6ddcce4e02c91c4de1076548a4c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:38:21 -0700 Subject: mm, memory_hotplug: replace for_device by want_memblock in arch_add_memory arch_add_memory gets for_device argument which then controls whether we want to create memblocks for created memory sections. Simplify the logic by telling whether we want memblocks directly rather than going through pointless negation. This also makes the api easier to understand because it is clear what we want rather than nothing telling for_device which can mean anything. This shouldn't introduce any functional change. Link: http://lkml.kernel.org/r/20170515085827.16474-13-mhocko@kernel.org Signed-off-by: Michal Hocko Tested-by: Dan Williams Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Balbir Singh Cc: Daniel Kiper Cc: David Rientjes Cc: Heiko Carstens Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Reza Arbab Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4d65a2fcac15..780c806e17d3 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -298,7 +298,7 @@ extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource, bool online); extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, bool for_device); -extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); +extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); -- cgit v1.2.3 From 559bfc7d1beff814a8e9999d102bf1157ef1f010 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:38:28 -0700 Subject: mm, memory_hotplug: remove unused cruft after memory hotplug rework zone_for_memory doesn't have any user anymore as well as the whole zone shifting infrastructure so drop them all. This shouldn't introduce any functional changes. Link: http://lkml.kernel.org/r/20170515085827.16474-15-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Balbir Singh Cc: Dan Williams Cc: Daniel Kiper Cc: David Rientjes Cc: Heiko Carstens Cc: Igor Mammedov Cc: Jerome Glisse Cc: Joonsoo Kim Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Reza Arbab Cc: Tobias Regnery Cc: Toshi Kani Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 780c806e17d3..ed167541e4fc 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -296,8 +296,6 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource, bool online); -extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, - bool for_device); extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); -- cgit v1.2.3 From 94310cbcaa3c2bc1b790ba997270f28dc173d8ce Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 6 Jul 2017 15:38:38 -0700 Subject: mm/madvise: enable (soft|hard) offline of HugeTLB pages at PGD level Though migrating gigantic HugeTLB pages does not sound much like real world use case, they can be affected by memory errors. Hence migration at the PGD level HugeTLB pages should be supported just to enable soft and hard offline use cases. While allocating the new gigantic HugeTLB page, it should not matter whether new page comes from the same node or not. There would be very few gigantic pages on the system afterall, we should not be bothered about node locality when trying to save a big page from crashing. This change renames dequeu_huge_page_node() function as dequeue_huge _page_node_exact() preserving it's original functionality. Now the new dequeue_huge_page_node() function scans through all available online nodes to allocate a huge page for the NUMA_NO_NODE case and just falls back calling dequeu_huge_page_node_exact() for all other cases. [arnd@arndb.de: make hstate_is_gigantic() inline] Link: http://lkml.kernel.org/r/20170522124748.3911296-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20170516100509.20122-1-khandual@linux.vnet.ibm.com Signed-off-by: Anshuman Khandual Signed-off-by: Arnd Bergmann Cc: "Aneesh Kumar K.V" Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b857fc8cc2ec..5f539f985e2a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -466,7 +466,11 @@ extern int dissolve_free_huge_pages(unsigned long start_pfn, static inline bool hugepage_migration_supported(struct hstate *h) { #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION - return huge_page_shift(h) == PMD_SHIFT; + if ((huge_page_shift(h) == PMD_SHIFT) || + (huge_page_shift(h) == PGDIR_SHIFT)) + return true; + else + return false; #else return false; #endif @@ -518,6 +522,11 @@ struct hstate {}; #define vma_mmu_pagesize(v) PAGE_SIZE #define huge_page_order(h) 0 #define huge_page_shift(h) PAGE_SHIFT +static inline bool hstate_is_gigantic(struct hstate *h) +{ + return false; +} + static inline unsigned int pages_per_huge_page(struct hstate *h) { return 1; -- cgit v1.2.3 From d5ed7444dafb94b6877410d1b66a846eb7184a09 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 6 Jul 2017 15:38:47 -0700 Subject: mm/hugetlb: export hugetlb_entry_migration helper We will be using this later from the ppc64 code. Change the return type to bool. Link: http://lkml.kernel.org/r/1494926612-23928-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V Reviewed-by: Naoya Horiguchi Cc: Anshuman Khandual Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5f539f985e2a..aa1df49b9a14 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -126,6 +126,7 @@ int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); +bool is_hugetlb_entry_migration(pte_t pte); #else /* !CONFIG_HUGETLB_PAGE */ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) -- cgit v1.2.3 From faaa5b62d3f7907e217b179556038f9f8e157ee0 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 6 Jul 2017 15:38:50 -0700 Subject: mm/follow_page_mask: add support for hugetlb pgd entries ppc64 supports pgd hugetlb entries. Add code to handle hugetlb pgd entries to follow_page_mask so that ppc64 can switch to it to handle hugetlbe entries. Link: http://lkml.kernel.org/r/1494926612-23928-5-git-send-email-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Anshuman Khandual Signed-off-by: Aneesh Kumar K.V Cc: Naoya Horiguchi Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index aa1df49b9a14..3656ce605dc9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -121,6 +121,9 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); +struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, + pgd_t *pgd, int flags); + int pmd_huge(pmd_t pmd); int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, @@ -150,6 +153,7 @@ static inline void hugetlb_show_meminfo(void) } #define follow_huge_pmd(mm, addr, pmd, flags) NULL #define follow_huge_pud(mm, addr, pud, flags) NULL +#define follow_huge_pgd(mm, addr, pgd, flags) NULL #define prepare_hugepage_range(file, addr, len) (-EINVAL) #define pmd_huge(x) 0 #define pud_huge(x) 0 -- cgit v1.2.3 From e22992923f741c951b830121655b58342fce202e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 6 Jul 2017 15:38:53 -0700 Subject: mm/hugetlb: move default definition of hugepd_t earlier in the header This enable to use the hugepd_t type early. No functional change in this patch. Link: http://lkml.kernel.org/r/1494926612-23928-6-git-send-email-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Naoya Horiguchi Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3656ce605dc9..f01427c79947 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -14,6 +14,30 @@ struct ctl_table; struct user_struct; struct mmu_gather; +#ifndef is_hugepd +/* + * Some architectures requires a hugepage directory format that is + * required to support multiple hugepage sizes. For example + * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables" + * introduced the same on powerpc. This allows for a more flexible hugepage + * pagetable layout. + */ +typedef struct { unsigned long pd; } hugepd_t; +#define is_hugepd(hugepd) (0) +#define __hugepd(x) ((hugepd_t) { (x) }) +static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr) +{ + return 0; +} +#else +extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, + int write, struct page **pages, int *nr); +#endif + + #ifdef CONFIG_HUGETLB_PAGE #include @@ -222,29 +246,6 @@ static inline int pud_write(pud_t pud) } #endif -#ifndef is_hugepd -/* - * Some architectures requires a hugepage directory format that is - * required to support multiple hugepage sizes. For example - * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables" - * introduced the same on powerpc. This allows for a more flexible hugepage - * pagetable layout. - */ -typedef struct { unsigned long pd; } hugepd_t; -#define is_hugepd(hugepd) (0) -#define __hugepd(x) ((hugepd_t) { (x) }) -static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, - unsigned pdshift, unsigned long end, - int write, struct page **pages, int *nr) -{ - return 0; -} -#else -extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr, - unsigned pdshift, unsigned long end, - int write, struct page **pages, int *nr); -#endif - #define HUGETLB_ANON_FILE "anon_hugepage" enum { -- cgit v1.2.3 From 4dc71451a2078efcad2f66bd6ef130d2296827b1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 6 Jul 2017 15:38:56 -0700 Subject: mm/follow_page_mask: add support for hugepage directory entry Architectures like ppc64 supports hugepage size that is not mapped to any of of the page table levels. Instead they add an alternate page table entry format called hugepage directory (hugepd). hugepd indicates that the page table entry maps to a set of hugetlb pages. Add support for this in generic follow_page_mask code. We already support this format in the generic gup code. The default implementation prints warning and returns NULL. We will add ppc64 support in later patches Link: http://lkml.kernel.org/r/1494926612-23928-7-git-send-email-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Naoya Horiguchi Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f01427c79947..c92a1f0c7240 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -141,6 +141,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); +struct page *follow_huge_pd(struct vm_area_struct *vma, + unsigned long address, hugepd_t hpd, + int flags, int pdshift); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, @@ -175,6 +178,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) static inline void hugetlb_show_meminfo(void) { } +#define follow_huge_pd(vma, addr, hpd, flags, pdshift) NULL #define follow_huge_pmd(mm, addr, pmd, flags) NULL #define follow_huge_pud(mm, addr, pud, flags) NULL #define follow_huge_pgd(mm, addr, pgd, flags) NULL -- cgit v1.2.3 From 3749a8f008eac3355a9e50b366ba08317a7e9cf8 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 6 Jul 2017 15:39:08 -0700 Subject: mm: zero hash tables in allocator Add a new flag HASH_ZERO which when provided grantees that the hash table that is returned by alloc_large_system_hash() is zeroed. In most cases that is what is needed by the caller. Use page level allocator's __GFP_ZERO flags to zero the memory. It is using memset() which is efficient method to zero memory and is optimized for most platforms. Link: http://lkml.kernel.org/r/1488432825-92126-3-git-send-email-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Babu Moger Cc: David Miller Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 962164d36506..e223d91b6439 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -358,6 +358,7 @@ extern void *alloc_large_system_hash(const char *tablename, #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ #define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min * shift passed via *_hash_shift */ +#define HASH_ZERO 0x00000004 /* Zero allocated hash table */ /* Only NUMA needs hash distribution. 64bit NUMA architectures have * sufficient vmalloc space. -- cgit v1.2.3 From 7868a2087ec13ec4a5df0c5e00999863be132ba8 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 6 Jul 2017 15:39:42 -0700 Subject: mm/hugetlb: add size parameter to huge_pte_offset() A poisoned or migrated hugepage is stored as a swap entry in the page tables. On architectures that support hugepages consisting of contiguous page table entries (such as on arm64) this leads to ambiguity in determining the page table entry to return in huge_pte_offset() when a poisoned entry is encountered. Let's remove the ambiguity by adding a size parameter to convey additional information about the requested address. Also fixup the definition/usage of huge_pte_offset() throughout the tree. Link: http://lkml.kernel.org/r/20170522133604.11392-4-punit.agrawal@arm.com Signed-off-by: Punit Agrawal Acked-by: Steve Capper Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: James Hogan (odd fixer:METAG ARCHITECTURE) Cc: Ralf Baechle (supporter:MIPS) Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Chris Metcalf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Alexander Viro Cc: Michal Hocko Cc: Mike Kravetz Cc: Naoya Horiguchi Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Hillf Danton Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c92a1f0c7240..31e665fbcf76 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -137,7 +137,8 @@ extern struct list_head huge_boot_pages; pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz); -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); +pte_t *huge_pte_offset(struct mm_struct *mm, + unsigned long addr, unsigned long sz); int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); @@ -190,7 +191,7 @@ static inline void hugetlb_show_meminfo(void) #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) -#define huge_pte_offset(mm, address) 0 +#define huge_pte_offset(mm, address, sz) 0 static inline int dequeue_hwpoisoned_huge_page(struct page *page) { return 0; -- cgit v1.2.3 From e5251fd43007f9e1155331f0fa30685604a8e3a1 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 6 Jul 2017 15:39:50 -0700 Subject: mm/hugetlb: introduce set_huge_swap_pte_at() helper set_huge_pte_at(), an architecture callback to populate hugepage ptes, does not provide the range of virtual memory that is targeted. This leads to ambiguity when dealing with swap entries on architectures that support hugepages consisting of contiguous ptes. Fix the problem by introducing an overridable helper that is called when populating the page tables with swap entries. The size of the targeted region is provided to the helper to help determine the number of entries to be updated. Provide a default implementation that maintains the current behaviour. [punit.agrawal@arm.com: v4] Link: http://lkml.kernel.org/r/20170524115409.31309-8-punit.agrawal@arm.com [punit.agrawal@arm.com: add an empty definition for set_huge_swap_pte_at()] Link: http://lkml.kernel.org/r/20170525171331.31469-1-punit.agrawal@arm.com Link: http://lkml.kernel.org/r/20170522133604.11392-6-punit.agrawal@arm.com Signed-off-by: Punit Agrawal Acked-by: Steve Capper Cc: Mike Kravetz Cc: "Aneesh Kumar K.V" Cc: Catalin Marinas Cc: Will Deacon Cc: Naoya Horiguchi Cc: "Kirill A. Shutemov" Cc: Mark Rutland Cc: Hillf Danton Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 31e665fbcf76..46bfb702e7d6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -516,6 +516,14 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) { atomic_long_sub(l, &mm->hugetlb_usage); } + +#ifndef set_huge_swap_pte_at +static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz) +{ + set_huge_pte_at(mm, addr, ptep, pte); +} +#endif #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; #define alloc_huge_page(v, a, r) NULL @@ -565,6 +573,11 @@ static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) static inline void hugetlb_count_sub(long l, struct mm_struct *mm) { } + +static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz) +{ +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, -- cgit v1.2.3 From 45816682b2cd6771cf63cb7dc7dbebdd827a0132 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 6 Jul 2017 15:39:59 -0700 Subject: mm, mempolicy: stop adjusting current->il_next in mpol_rebind_nodemask() The task->il_next variable stores the next allocation node id for task's MPOL_INTERLEAVE policy. mpol_rebind_nodemask() updates interleave and bind mempolicies due to changing cpuset mems. Currently it also tries to make sure that current->il_next is valid within the updated nodemask. This is bogus, because 1) we are updating potentially any task's mempolicy, not just current, and 2) we might be updating a per-vma mempolicy, not task one. The interleave_nodes() function that uses il_next can cope fine with the value not being within the currently allowed nodes, so this hasn't manifested as an actual issue. We can remove the need for updating il_next completely by changing it to il_prev and store the node id of the previous interleave allocation instead of the next id. Then interleave_nodes() can calculate the next id using the current nodemask and also store it as il_prev, except when querying the next node via do_get_mempolicy(). Link: http://lkml.kernel.org/r/20170517081140.30654-3-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: Christoph Lameter Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: David Rientjes Cc: Dimitri Sivanich Cc: Hugh Dickins Cc: Li Zefan Cc: Mel Gorman Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c4ca7433d9d..5e8759b1b428 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -904,7 +904,7 @@ struct task_struct { #ifdef CONFIG_NUMA /* Protected by alloc_lock: */ struct mempolicy *mempolicy; - short il_next; + short il_prev; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING -- cgit v1.2.3 From 04ec6264f28793e56114d0a367bb4d3af667ab6a Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 6 Jul 2017 15:40:03 -0700 Subject: mm, page_alloc: pass preferred nid instead of zonelist to allocator The main allocator function __alloc_pages_nodemask() takes a zonelist pointer as one of its parameters. All of its callers directly or indirectly obtain the zonelist via node_zonelist() using a preferred node id and gfp_mask. We can make the code a bit simpler by doing the zonelist lookup in __alloc_pages_nodemask(), passing it a preferred node id instead (gfp_mask is already another parameter). There are some code size benefits thanks to removal of inlined node_zonelist(): bloat-o-meter add/remove: 2/2 grow/shrink: 4/36 up/down: 399/-1351 (-952) This will also make things simpler if we proceed with converting cpusets to zonelists. Link: http://lkml.kernel.org/r/20170517081140.30654-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reviewed-by: Christoph Lameter Acked-by: Michal Hocko Cc: Dimitri Sivanich Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: David Rientjes Cc: Hugh Dickins Cc: Li Zefan Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 11 +++++------ include/linux/mempolicy.h | 6 +++--- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index a89d37e8b387..4c6656f1fee7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -432,14 +432,13 @@ static inline void arch_alloc_page(struct page *page, int order) { } #endif struct page * -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, nodemask_t *nodemask); +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, + nodemask_t *nodemask); static inline struct page * -__alloc_pages(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist) +__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) { - return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); + return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); } /* @@ -452,7 +451,7 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); VM_WARN_ON(!node_online(nid)); - return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); + return __alloc_pages(gfp_mask, order, nid); } /* diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5f4d8281832b..ecb6cbeede5a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -146,7 +146,7 @@ extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, enum mpol_rebind_step step); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); -extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, +extern int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask); extern bool init_nodemask_of_mempolicy(nodemask_t *mask); @@ -269,13 +269,13 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { } -static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, +static inline int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask) { *mpol = NULL; *nodemask = NULL; - return node_zonelist(0, gfp_flags); + return 0; } static inline bool init_nodemask_of_mempolicy(nodemask_t *m) -- cgit v1.2.3 From 213980c0f23b6c4932fd5516da7e8443b2a615ea Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 6 Jul 2017 15:40:06 -0700 Subject: mm, mempolicy: simplify rebinding mempolicies when updating cpusets Commit c0ff7453bb5c ("cpuset,mm: fix no node to alloc memory when changing cpuset's mems") has introduced a two-step protocol when rebinding task's mempolicy due to cpuset update, in order to avoid a parallel allocation seeing an empty effective nodemask and failing. Later, commit cc9a6c877661 ("cpuset: mm: reduce large amounts of memory barrier related damage v3") introduced a seqlock protection and removed the synchronization point between the two update steps. At that point (or perhaps later), the two-step rebinding became unnecessary. Currently it only makes sure that the update first adds new nodes in step 1 and then removes nodes in step 2. Without memory barriers the effects are questionable, and even then this cannot prevent a parallel zonelist iteration checking the nodemask at each step to observe all nodes as unusable for allocation. We now fully rely on the seqlock to prevent premature OOMs and allocation failures. We can thus remove the two-step update parts and simplify the code. Link: http://lkml.kernel.org/r/20170517081140.30654-5-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Christoph Lameter Cc: David Rientjes Cc: Dimitri Sivanich Cc: Hugh Dickins Cc: Li Zefan Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index ecb6cbeede5a..3a58b4be1b0c 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -142,8 +142,7 @@ bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); extern void numa_policy_init(void); -extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, - enum mpol_rebind_step step); +extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern int huge_node(struct vm_area_struct *vma, @@ -260,8 +259,7 @@ static inline void numa_default_policy(void) } static inline void mpol_rebind_task(struct task_struct *tsk, - const nodemask_t *new, - enum mpol_rebind_step step) + const nodemask_t *new) { } -- cgit v1.2.3 From 94f4a1618b4c2b268f9e70bd1516932927782293 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Jul 2017 15:40:22 -0700 Subject: mm: kmemleak: treat vm_struct as alternative reference to vmalloc'ed objects Kmemleak requires that vmalloc'ed objects have a minimum reference count of 2: one in the corresponding vm_struct object and the other owned by the vmalloc() caller. There are cases, however, where the original vmalloc() returned pointer is lost and, instead, a pointer to vm_struct is stored (see free_thread_stack()). Kmemleak currently reports such objects as leaks. This patch adds support for treating any surplus references to an object as additional references to a specified object. It introduces the kmemleak_vmalloc() API function which takes a vm_struct pointer and sets its surplus reference passing to the actual vmalloc() returned pointer. The __vmalloc_node_range() calling site has been modified accordingly. Link: http://lkml.kernel.org/r/1495726937-23557-4-git-send-email-catalin.marinas@arm.com Signed-off-by: Catalin Marinas Reported-by: "Luis R. Rodriguez" Cc: Michal Hocko Cc: Andy Lutomirski Cc: "Luis R. Rodriguez" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmemleak.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 1c2a32829620..590343f6c1b1 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -22,6 +22,7 @@ #define __KMEMLEAK_H #include +#include #ifdef CONFIG_DEBUG_KMEMLEAK @@ -30,6 +31,8 @@ extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) __ref; extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, gfp_t gfp) __ref; +extern void kmemleak_vmalloc(const struct vm_struct *area, size_t size, + gfp_t gfp) __ref; extern void kmemleak_free(const void *ptr) __ref; extern void kmemleak_free_part(const void *ptr, size_t size) __ref; extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; @@ -81,6 +84,10 @@ static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, gfp_t gfp) { } +static inline void kmemleak_vmalloc(const struct vm_struct *area, size_t size, + gfp_t gfp) +{ +} static inline void kmemleak_free(const void *ptr) { } -- cgit v1.2.3 From 2262185c5b287f2758afda79c149b7cf6bee165c Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 6 Jul 2017 15:40:25 -0700 Subject: mm: per-cgroup memory reclaim stats Track the following reclaim counters for every memory cgroup: PGREFILL, PGSCAN, PGSTEAL, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE and PGLAZYFREED. These values are exposed using the memory.stats interface of cgroup v2. The meaning of each value is the same as for global counters, available using /proc/vmstat. Also, for consistency, rename mem_cgroup_count_vm_event() to count_memcg_event_mm(). Link: http://lkml.kernel.org/r/1494530183-30808-1-git-send-email-guro@fb.com Signed-off-by: Roman Gushchin Suggested-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Tejun Heo Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 48 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 899949bbb2f9..b2a5b1cd4e55 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -357,6 +357,17 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) } struct mem_cgroup *mem_cgroup_from_id(unsigned short id); +static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) +{ + struct mem_cgroup_per_node *mz; + + if (mem_cgroup_disabled()) + return NULL; + + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + return mz->memcg; +} + /** * parent_mem_cgroup - find the accounting parent of a memcg * @memcg: memcg whose parent to find @@ -546,8 +557,23 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); -static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, - enum vm_event_item idx) +static inline void count_memcg_events(struct mem_cgroup *memcg, + enum vm_event_item idx, + unsigned long count) +{ + if (!mem_cgroup_disabled()) + this_cpu_add(memcg->stat->events[idx], count); +} + +static inline void count_memcg_page_event(struct page *page, + enum memcg_stat_item idx) +{ + if (page->mem_cgroup) + count_memcg_events(page->mem_cgroup, idx, 1); +} + +static inline void count_memcg_event_mm(struct mm_struct *mm, + enum vm_event_item idx) { struct mem_cgroup *memcg; @@ -675,6 +701,11 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) return NULL; } +static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) +{ + return NULL; +} + static inline bool mem_cgroup_online(struct mem_cgroup *memcg) { return true; @@ -789,8 +820,19 @@ static inline void mem_cgroup_split_huge_fixup(struct page *head) { } +static inline void count_memcg_events(struct mem_cgroup *memcg, + enum vm_event_item idx, + unsigned long count) +{ +} + +static inline void count_memcg_page_event(struct page *page, + enum memcg_stat_item idx) +{ +} + static inline -void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) +void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } #endif /* CONFIG_MEMCG */ -- cgit v1.2.3 From 8e675f7af50747e1e9e96538e8706767e4f80e2c Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 6 Jul 2017 15:40:28 -0700 Subject: mm/oom_kill: count global and memory cgroup oom kills Show count of oom killer invocations in /proc/vmstat and count of processes killed in memory cgroup in knob "memory.events" (in memory.oom_control for v1 cgroup). Also describe difference between "oom" and "oom_kill" in memory cgroup documentation. Currently oom in memory cgroup kills tasks iff shortage has happened inside page fault. These counters helps in monitoring oom kills - for now the only way is grepping for magic words in kernel log. [akpm@linux-foundation.org: fix for mem_cgroup_count_vm_event() rename] [akpm@linux-foundation.org: fix comment, per Konstantin] Link: http://lkml.kernel.org/r/149570810989.203600.9492483715840752937.stgit@buzz Signed-off-by: Konstantin Khlebnikov Cc: Michal Hocko Cc: Tetsuo Handa Cc: Roman Guschin Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++++- include/linux/vm_event_item.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b2a5b1cd4e55..72d0853beb31 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -582,8 +582,11 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (likely(memcg)) + if (likely(memcg)) { this_cpu_inc(memcg->stat->events[idx]); + if (idx == OOM_KILL) + cgroup_file_notify(&memcg->events_file); + } rcu_read_unlock(); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index be3ab2d13adf..37e8d31a4632 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,6 +41,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, PAGEOUTRUN, PGROTATED, DROP_PAGECACHE, DROP_SLAB, + OOM_KILL, #ifdef CONFIG_NUMA_BALANCING NUMA_PTE_UPDATES, NUMA_HUGE_PTE_UPDATES, -- cgit v1.2.3 From 385386cff4c6f047907655e05791d88198c4c523 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 6 Jul 2017 15:40:43 -0700 Subject: mm: vmstat: move slab statistics from zone to node counters Patch series "mm: per-lruvec slab stats" Josef is working on a new approach to balancing slab caches and the page cache. For this to work, he needs slab cache statistics on the lruvec level. These patches implement that by adding infrastructure that allows updating and reading generic VM stat items per lruvec, then switches some existing VM accounting sites, including the slab accounting ones, to this new cgroup-aware API. I'll follow up with more patches on this, because there is actually substantial simplification that can be done to the memory controller when we replace private memcg accounting with making the existing VM accounting sites cgroup-aware. But this is enough for Josef to base his slab reclaim work on, so here goes. This patch (of 5): To re-implement slab cache vs. page cache balancing, we'll need the slab counters at the lruvec level, which, ever since lru reclaim was moved from the zone to the node, is the intersection of the node, not the zone, and the memcg. We could retain the per-zone counters for when the page allocator dumps its memory information on failures, and have counters on both levels - which on all but NUMA node 0 is usually redundant. But let's keep it simple for now and just move them. If anybody complains we can restore the per-zone counters. [hannes@cmpxchg.org: fix oops] Link: http://lkml.kernel.org/r/20170605183511.GA8915@cmpxchg.org Link: http://lkml.kernel.org/r/20170530181724.27197-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Josef Bacik Cc: Michal Hocko Cc: Vladimir Davydov Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index abc1641011f2..7e8f100cb56d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -125,8 +125,6 @@ enum zone_stat_item { NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ - NR_SLAB_RECLAIMABLE, - NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_KERNEL_STACK_KB, /* measured in KiB */ /* Second 128 byte cacheline */ @@ -152,6 +150,8 @@ enum node_stat_item { NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */ + NR_SLAB_RECLAIMABLE, + NR_SLAB_UNRECLAIMABLE, NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ WORKINGSET_REFAULT, -- cgit v1.2.3 From 320492961c1cf21da5547b00c23e525851c1d16f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 6 Jul 2017 15:40:46 -0700 Subject: mm: memcontrol: use the node-native slab memory counters Now that the slab counters are moved from the zone to the node level we can drop the private memcg node stats and use the official ones. Link: http://lkml.kernel.org/r/20170530181724.27197-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Josef Bacik Cc: Michal Hocko Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 72d0853beb31..fa506ae61d66 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -44,8 +44,6 @@ enum memcg_stat_item { MEMCG_SOCK, /* XXX: why are these zone and not node counters? */ MEMCG_KERNEL_STACK_KB, - MEMCG_SLAB_RECLAIMABLE, - MEMCG_SLAB_UNRECLAIMABLE, MEMCG_NR_STAT, }; -- cgit v1.2.3 From ed52be7bfd45533b194b429f43361493d24599a7 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 6 Jul 2017 15:40:49 -0700 Subject: mm: memcontrol: use generic mod_memcg_page_state for kmem pages The kmem-specific functions do the same thing. Switch and drop. Link: http://lkml.kernel.org/r/20170530181724.27197-5-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Josef Bacik Cc: Michal Hocko Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fa506ae61d66..5a72d8377942 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -929,19 +929,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } -/** - * memcg_kmem_update_page_stat - update kmem page state statistics - * @page: the page - * @idx: page state item to account - * @val: number of pages (positive or negative) - */ -static inline void memcg_kmem_update_page_stat(struct page *page, - enum memcg_stat_item idx, int val) -{ - if (memcg_kmem_enabled() && page->mem_cgroup) - this_cpu_add(page->mem_cgroup->stat->count[idx], val); -} - #else #define for_each_memcg_cache_index(_idx) \ for (; NULL; ) @@ -964,10 +951,6 @@ static inline void memcg_put_cache_ids(void) { } -static inline void memcg_kmem_update_page_stat(struct page *page, - enum memcg_stat_item idx, int val) -{ -} #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 00f3ca2c2d6635d85108571c4dd9a29088668662 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 6 Jul 2017 15:40:52 -0700 Subject: mm: memcontrol: per-lruvec stats infrastructure lruvecs are at the intersection of the NUMA node and memcg, which is the scope for most paging activity. Introduce a convenient accounting infrastructure that maintains statistics per node, per memcg, and the lruvec itself. Then convert over accounting sites for statistics that are already tracked in both nodes and memcgs and can be easily switched. [hannes@cmpxchg.org: fix crash in the new cgroup stat keeping code] Link: http://lkml.kernel.org/r/20170531171450.GA10481@cmpxchg.org [hannes@cmpxchg.org: don't track uncharged pages at all Link: http://lkml.kernel.org/r/20170605175254.GA8547@cmpxchg.org [hannes@cmpxchg.org: add missing free_percpu()] Link: http://lkml.kernel.org/r/20170605175354.GB8547@cmpxchg.org [linux@roeck-us.net: hexagon: fix build error caused by include file order] Link: http://lkml.kernel.org/r/20170617153721.GA4382@roeck-us.net Link: http://lkml.kernel.org/r/20170530181724.27197-6-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Signed-off-by: Guenter Roeck Acked-by: Vladimir Davydov Cc: Josef Bacik Cc: Michal Hocko Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 246 +++++++++++++++++++++++++++++++++++++++------ include/linux/vmstat.h | 1 - 2 files changed, 216 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5a72d8377942..3914e3dd6168 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -26,7 +26,8 @@ #include #include #include -#include +#include +#include #include #include @@ -98,11 +99,16 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; +struct lruvec_stat { + long count[NR_VM_NODE_STAT_ITEMS]; +}; + /* * per-zone information in memory controller. */ struct mem_cgroup_per_node { struct lruvec lruvec; + struct lruvec_stat __percpu *lruvec_stat; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; @@ -496,23 +502,18 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, return val; } -static inline void mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, int val) +static inline void __mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, int val) { if (!mem_cgroup_disabled()) - this_cpu_add(memcg->stat->count[idx], val); -} - -static inline void inc_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) -{ - mod_memcg_state(memcg, idx, 1); + __this_cpu_add(memcg->stat->count[idx], val); } -static inline void dec_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, int val) { - mod_memcg_state(memcg, idx, -1); + if (!mem_cgroup_disabled()) + this_cpu_add(memcg->stat->count[idx], val); } /** @@ -532,6 +533,13 @@ static inline void dec_memcg_state(struct mem_cgroup *memcg, * * Kernel pages are an exception to this, since they'll never move. */ +static inline void __mod_memcg_page_state(struct page *page, + enum memcg_stat_item idx, int val) +{ + if (page->mem_cgroup) + __mod_memcg_state(page->mem_cgroup, idx, val); +} + static inline void mod_memcg_page_state(struct page *page, enum memcg_stat_item idx, int val) { @@ -539,16 +547,76 @@ static inline void mod_memcg_page_state(struct page *page, mod_memcg_state(page->mem_cgroup, idx, val); } -static inline void inc_memcg_page_state(struct page *page, - enum memcg_stat_item idx) +static inline unsigned long lruvec_page_state(struct lruvec *lruvec, + enum node_stat_item idx) { - mod_memcg_page_state(page, idx, 1); + struct mem_cgroup_per_node *pn; + long val = 0; + int cpu; + + if (mem_cgroup_disabled()) + return node_page_state(lruvec_pgdat(lruvec), idx); + + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + for_each_possible_cpu(cpu) + val += per_cpu(pn->lruvec_stat->count[idx], cpu); + + if (val < 0) + val = 0; + + return val; } -static inline void dec_memcg_page_state(struct page *page, - enum memcg_stat_item idx) +static inline void __mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) { - mod_memcg_page_state(page, idx, -1); + struct mem_cgroup_per_node *pn; + + __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); + if (mem_cgroup_disabled()) + return; + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + __mod_memcg_state(pn->memcg, idx, val); + __this_cpu_add(pn->lruvec_stat->count[idx], val); +} + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + struct mem_cgroup_per_node *pn; + + mod_node_page_state(lruvec_pgdat(lruvec), idx, val); + if (mem_cgroup_disabled()) + return; + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + mod_memcg_state(pn->memcg, idx, val); + this_cpu_add(pn->lruvec_stat->count[idx], val); +} + +static inline void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + struct mem_cgroup_per_node *pn; + + __mod_node_page_state(page_pgdat(page), idx, val); + if (mem_cgroup_disabled() || !page->mem_cgroup) + return; + __mod_memcg_state(page->mem_cgroup, idx, val); + pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; + __this_cpu_add(pn->lruvec_stat->count[idx], val); +} + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + struct mem_cgroup_per_node *pn; + + mod_node_page_state(page_pgdat(page), idx, val); + if (mem_cgroup_disabled() || !page->mem_cgroup) + return; + mod_memcg_state(page->mem_cgroup, idx, val); + pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; + this_cpu_add(pn->lruvec_stat->count[idx], val); } unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -777,19 +845,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, return 0; } -static inline void mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, - int nr) +static inline void __mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, + int nr) { } -static inline void inc_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, + int nr) { } -static inline void dec_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void __mod_memcg_page_state(struct page *page, + enum memcg_stat_item idx, + int nr) { } @@ -799,14 +869,34 @@ static inline void mod_memcg_page_state(struct page *page, { } -static inline void inc_memcg_page_state(struct page *page, - enum memcg_stat_item idx) +static inline unsigned long lruvec_page_state(struct lruvec *lruvec, + enum node_stat_item idx) { + return node_page_state(lruvec_pgdat(lruvec), idx); } -static inline void dec_memcg_page_state(struct page *page, - enum memcg_stat_item idx) +static inline void __mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) { + __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + __mod_node_page_state(page_pgdat(page), idx, val); +} + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + mod_node_page_state(page_pgdat(page), idx, val); } static inline @@ -838,6 +928,102 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) } #endif /* CONFIG_MEMCG */ +static inline void __inc_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + __mod_memcg_state(memcg, idx, 1); +} + +static inline void __dec_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + __mod_memcg_state(memcg, idx, -1); +} + +static inline void __inc_memcg_page_state(struct page *page, + enum memcg_stat_item idx) +{ + __mod_memcg_page_state(page, idx, 1); +} + +static inline void __dec_memcg_page_state(struct page *page, + enum memcg_stat_item idx) +{ + __mod_memcg_page_state(page, idx, -1); +} + +static inline void __inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, 1); +} + +static inline void __dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, -1); +} + +static inline void __inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, 1); +} + +static inline void __dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, -1); +} + +static inline void inc_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + mod_memcg_state(memcg, idx, 1); +} + +static inline void dec_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + mod_memcg_state(memcg, idx, -1); +} + +static inline void inc_memcg_page_state(struct page *page, + enum memcg_stat_item idx) +{ + mod_memcg_page_state(page, idx, 1); +} + +static inline void dec_memcg_page_state(struct page *page, + enum memcg_stat_item idx) +{ + mod_memcg_page_state(page, idx, -1); +} + +static inline void inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, 1); +} + +static inline void dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, -1); +} + +static inline void inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, 1); +} + +static inline void dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, -1); +} + #ifdef CONFIG_CGROUP_WRITEBACK struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 613771909b6e..b3d85f30d424 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include -- cgit v1.2.3 From f70029bbaacbfa8f082d2b4988717cba4e269f17 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:41:02 -0700 Subject: mm, memory_hotplug: drop CONFIG_MOVABLE_NODE Commit 20b2f52b73fe ("numa: add CONFIG_MOVABLE_NODE for movable-dedicated node") has introduced CONFIG_MOVABLE_NODE without a good explanation on why it is actually useful. It makes a lot of sense to make movable node semantic opt in but we already have that because the feature has to be explicitly enabled on the kernel command line. A config option on top only makes the configuration space larger without a good reason. It also adds an additional ifdefery that pollutes the code. Just drop the config option and make it de-facto always enabled. This shouldn't introduce any change to the semantic. Link: http://lkml.kernel.org/r/20170529114141.536-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Reza Arbab Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Jerome Glisse Cc: Yasuaki Ishimatsu Cc: Xishi Qiu Cc: Kani Toshimitsu Cc: Chen Yucong Cc: Joonsoo Kim Cc: Andi Kleen Cc: David Rientjes Cc: Daniel Kiper Cc: Igor Mammedov Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 18 ------------------ include/linux/nodemask.h | 4 ---- 2 files changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8098695e5d8d..1199e605d676 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -57,10 +57,8 @@ struct memblock { extern struct memblock memblock; extern int memblock_debug; -#ifdef CONFIG_MOVABLE_NODE /* If movable_node boot option specified */ extern bool movable_node_enabled; -#endif /* CONFIG_MOVABLE_NODE */ #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK #define __init_memblock __meminit @@ -169,7 +167,6 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, i != (u64)ULLONG_MAX; \ __next_reserved_mem_region(&i, p_start, p_end)) -#ifdef CONFIG_MOVABLE_NODE static inline bool memblock_is_hotpluggable(struct memblock_region *m) { return m->flags & MEMBLOCK_HOTPLUG; @@ -179,16 +176,6 @@ static inline bool __init_memblock movable_node_is_enabled(void) { return movable_node_enabled; } -#else -static inline bool memblock_is_hotpluggable(struct memblock_region *m) -{ - return false; -} -static inline bool movable_node_is_enabled(void) -{ - return false; -} -#endif static inline bool memblock_is_mirror(struct memblock_region *m) { @@ -296,7 +283,6 @@ phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); -#ifdef CONFIG_MOVABLE_NODE /* * Set the allocation direction to bottom-up or top-down. */ @@ -314,10 +300,6 @@ static inline bool memblock_bottom_up(void) { return memblock.bottom_up; } -#else -static inline void __init memblock_set_bottom_up(bool enable) {} -static inline bool memblock_bottom_up(void) { return false; } -#endif /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index f746e44d4046..cf0b91c3ec12 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -387,11 +387,7 @@ enum node_states { #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif -#ifdef CONFIG_MOVABLE_NODE N_MEMORY, /* The node has memory(regular, high, movable) */ -#else - N_MEMORY = N_HIGH_MEMORY, -#endif N_CPU, /* The node has one or more cpus */ NR_NODE_STATES }; -- cgit v1.2.3 From 4932381ee2a77a21641009149722e1bb92bd99e2 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 6 Jul 2017 15:41:05 -0700 Subject: mm, memory_hotplug: move movable_node to the hotplug proper movable_node_is_enabled is defined in memblock proper while it is initialized from the memory hotplug proper. This is quite messy and it makes a dependency between the two so move movable_node along with the helper functions to memory_hotplug. To make it more entertaining the kernel parameter is ignored unless CONFIG_HAVE_MEMBLOCK_NODE_MAP=y because we do not have the node information for each memblock otherwise. So let's warn when the option is disabled. Link: http://lkml.kernel.org/r/20170529114141.536-4-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Reza Arbab Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Jerome Glisse Cc: Yasuaki Ishimatsu Cc: Xishi Qiu Cc: Kani Toshimitsu Cc: Chen Yucong Cc: Joonsoo Kim Cc: Andi Kleen Cc: David Rientjes Cc: Daniel Kiper Cc: Igor Mammedov Cc: Vitaly Kuznetsov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 7 ------- include/linux/memory_hotplug.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1199e605d676..77d427974f57 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -57,8 +57,6 @@ struct memblock { extern struct memblock memblock; extern int memblock_debug; -/* If movable_node boot option specified */ -extern bool movable_node_enabled; #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK #define __init_memblock __meminit @@ -172,11 +170,6 @@ static inline bool memblock_is_hotpluggable(struct memblock_region *m) return m->flags & MEMBLOCK_HOTPLUG; } -static inline bool __init_memblock movable_node_is_enabled(void) -{ - return movable_node_enabled; -} - static inline bool memblock_is_mirror(struct memblock_region *m) { return m->flags & MEMBLOCK_MIRROR; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ed167541e4fc..c8a5056a5ae0 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -115,6 +115,12 @@ extern void __online_page_free(struct page *page); extern int try_online_node(int nid); extern bool memhp_auto_online; +/* If movable_node boot option specified */ +extern bool movable_node_enabled; +static inline bool movable_node_is_enabled(void) +{ + return movable_node_enabled; +} #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); @@ -266,6 +272,10 @@ static inline void put_online_mems(void) {} static inline void mem_hotplug_begin(void) {} static inline void mem_hotplug_done(void) {} +static inline bool movable_node_is_enabled(void) +{ + return false; +} #endif /* ! CONFIG_MEMORY_HOTPLUG */ #ifdef CONFIG_MEMORY_HOTREMOVE -- cgit v1.2.3 From f77af15165847406b15d8f70c382c4cb15846b2a Mon Sep 17 00:00:00 2001 From: Josh Zimmerman Date: Sun, 25 Jun 2017 14:53:23 -0700 Subject: Add "shutdown" to "struct class". The TPM class has some common shutdown code that must be executed for all drivers. This adds some needed functionality for that. Signed-off-by: Josh Zimmerman Acked-by: Greg Kroah-Hartman Cc: stable@vger.kernel.org Fixes: 74d6b3ceaa17 ("tpm: fix suspend/resume paths for TPM 2.0") Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 6baa1238f158..723cd54b94da 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -375,6 +375,7 @@ int subsys_virtual_register(struct bus_type *subsys, * @suspend: Used to put the device to sleep mode, usually to a low power * state. * @resume: Used to bring the device from the sleep mode. + * @shutdown: Called at shut-down time to quiesce the device. * @ns_type: Callbacks so sysfs can detemine namespaces. * @namespace: Namespace of the device belongs to this class. * @pm: The default device power management operations of this class. @@ -403,6 +404,7 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + int (*shutdown)(struct device *dev); const struct kobj_ns_type_operations *ns_type; const void *(*namespace)(struct device *dev); -- cgit v1.2.3 From 0e4524a5d341e719e8ee9ee7db5d58e2c5a4c10e Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Jul 2017 14:44:28 +0200 Subject: KVM: mark vcpu->pid pointer as rcu protected We do use rcu to protect the pid pointer. Mark it as such and adopt all code to use the proper access methods. This was detected by sparse. "virt/kvm/kvm_main.c:2248:15: error: incompatible types in comparison expression (different address spaces)" Signed-off-by: Christian Borntraeger Reviewed-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0b50e7b35ed4..bcd37b855c66 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -234,7 +234,7 @@ struct kvm_vcpu { int guest_fpu_loaded, guest_xcr0_loaded; struct swait_queue_head wq; - struct pid *pid; + struct pid __rcu *pid; int sigset_active; sigset_t sigset; struct kvm_vcpu_stat stat; -- cgit v1.2.3 From 3068a254d5519cd5116f61297462da6d1aa84c20 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 6 Jul 2017 11:42:00 +0200 Subject: rtc: introduce new registration method Introduce rtc_register_device() to register an already allocated and initialized struct rtc_device. It automatically sets up the owner and the two steps allocation/registration will allow to remove race conditions in the IRQ handling of some driver. It also allows to properly extend the core without adding more arguments to rtc_device_register(). Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index d354f56e0cf5..8e4a5f44f59e 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -142,6 +142,8 @@ struct rtc_device { /* Some hardware can't support UIE mode */ int uie_unsupported; + bool registered; + #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL struct work_struct uie_task; struct timer_list uie_timer; @@ -163,6 +165,8 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev, const char *name, const struct rtc_class_ops *ops, struct module *owner); +struct rtc_device *devm_rtc_allocate_device(struct device *dev); +int __rtc_register_device(struct module *owner, struct rtc_device *rtc); extern void rtc_device_unregister(struct rtc_device *rtc); extern void devm_rtc_device_unregister(struct device *dev, struct rtc_device *rtc); @@ -218,6 +222,9 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } +#define rtc_register_device(device) \ + __rtc_register_device(THIS_MODULE, device) + #ifdef CONFIG_RTC_HCTOSYS_DEVICE extern int rtc_hctosys_ret; #else -- cgit v1.2.3 From 697e5a47aa12cdab6f2a8b284cc923cdf704eafc Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 6 Jul 2017 11:42:02 +0200 Subject: rtc: add generic nvmem support Many RTCs have an on board non volatile storage. It can be battery backed RAM or an EEPROM. Use the nvmem subsystem to export it to both userspace and in-kernel consumers. This stays compatible with the previous (non documented) ABI that was using /sys/class/rtc/rtcx/device/nvram to export that memory. But will warn about the deprecation. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 8e4a5f44f59e..d53ecdc060cf 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -14,6 +14,7 @@ #include #include +#include #include extern int rtc_month_days(unsigned int month, unsigned int year); @@ -144,6 +145,12 @@ struct rtc_device { bool registered; + struct nvmem_config *nvmem_config; + struct nvmem_device *nvmem; + /* Old ABI support */ + bool nvram_old_abi; + struct bin_attribute *nvram; + #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL struct work_struct uie_task; struct timer_list uie_timer; -- cgit v1.2.3 From 4a12f95177280a660bda99e81838919b1cc6a91a Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 7 Jul 2017 10:51:38 +0200 Subject: KVM: mark kvm->busses as rcu protected mark kvm->busses as rcu protected and use the correct access function everywhere. found by sparse virt/kvm/kvm_main.c:3490:15: error: incompatible types in comparison expression (different address spaces) virt/kvm/kvm_main.c:3509:15: error: incompatible types in comparison expression (different address spaces) virt/kvm/kvm_main.c:3561:15: error: incompatible types in comparison expression (different address spaces) virt/kvm/kvm_main.c:3644:15: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Christian Borntraeger --- include/linux/kvm_host.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bcd37b855c66..6a164f9eb02c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -404,7 +404,7 @@ struct kvm { int last_boosted_vcpu; struct list_head vm_list; struct mutex lock; - struct kvm_io_bus *buses[KVM_NR_BUSES]; + struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; #ifdef CONFIG_HAVE_KVM_EVENTFD struct { spinlock_t lock; @@ -473,6 +473,12 @@ struct kvm { #define vcpu_err(vcpu, fmt, ...) \ kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) +static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) +{ + return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, + lockdep_is_held(&kvm->slots_lock)); +} + static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case -- cgit v1.2.3 From a80cf7b5f4149753d5f19c872a47e66195b167d4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Jul 2017 16:17:14 +0200 Subject: KVM: mark memory slots as rcu we access the memslots array via srcu. Mark it as such and use the right access functions also for the freeing of memory slots. Found by sparse: ./include/linux/kvm_host.h:565:16: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Christian Borntraeger Reviewed-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6a164f9eb02c..b3ca77a96b2d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -390,7 +390,7 @@ struct kvm { spinlock_t mmu_lock; struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ - struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; + struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; /* -- cgit v1.2.3 From dcbbd97ccb9c6f4dad39875c1404d2643eaf110b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2017 14:44:59 +0200 Subject: libceph: remove ceph_sanitize_features() workaround Reflects ceph.git commit ff1959282826ae6acd7134e1b1ede74ffd1cc04a. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index fd8b2953c78f..4962708841b5 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -77,29 +77,8 @@ #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */ #define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */ -/* - * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature - * vector to evaluate to 64 bit ~0. To cope, we designate 1ULL << 63 - * to mean 33 bit ~0, and introduce a helper below to do the - * translation. - * - * This was introduced by ceph.git commit - * 9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8 - * and fixed by ceph.git commit - * 4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c - */ #define CEPH_FEATURE_RESERVED (1ULL<<63) -static inline u64 ceph_sanitize_features(u64 features) -{ - if (features & CEPH_FEATURE_RESERVED) { - /* everything through OSD_SNAPMAPPER */ - return 0x1ffffffffull; - } else { - return features; - } -} - /* * Features supported. */ -- cgit v1.2.3 From f179d3ba8cb9073c2d96315b79ff7bc658a1feee Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2017 14:44:59 +0200 Subject: libceph: new features macros Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 242 +++++++++++++++++++++++++------------ 1 file changed, 167 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 4962708841b5..7fce9ed44af0 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -2,82 +2,174 @@ #define __CEPH_FEATURES /* - * feature bits + * Each time we reclaim bits for reuse we need to specify another bit + * that, if present, indicates we have the new incarnation of that + * feature. Base case is 1 (first use). */ -#define CEPH_FEATURE_UID (1ULL<<0) -#define CEPH_FEATURE_NOSRCADDR (1ULL<<1) -#define CEPH_FEATURE_MONCLOCKCHECK (1ULL<<2) -#define CEPH_FEATURE_FLOCK (1ULL<<3) -#define CEPH_FEATURE_SUBSCRIBE2 (1ULL<<4) -#define CEPH_FEATURE_MONNAMES (1ULL<<5) -#define CEPH_FEATURE_RECONNECT_SEQ (1ULL<<6) -#define CEPH_FEATURE_DIRLAYOUTHASH (1ULL<<7) -#define CEPH_FEATURE_OBJECTLOCATOR (1ULL<<8) -#define CEPH_FEATURE_PGID64 (1ULL<<9) -#define CEPH_FEATURE_INCSUBOSDMAP (1ULL<<10) -#define CEPH_FEATURE_PGPOOL3 (1ULL<<11) -#define CEPH_FEATURE_OSDREPLYMUX (1ULL<<12) -#define CEPH_FEATURE_OSDENC (1ULL<<13) -#define CEPH_FEATURE_OMAP (1ULL<<14) -#define CEPH_FEATURE_MONENC (1ULL<<15) -#define CEPH_FEATURE_QUERY_T (1ULL<<16) -#define CEPH_FEATURE_INDEP_PG_MAP (1ULL<<17) -#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18) -#define CEPH_FEATURE_CHUNKY_SCRUB (1ULL<<19) -#define CEPH_FEATURE_MON_NULLROUTE (1ULL<<20) -#define CEPH_FEATURE_MON_GV (1ULL<<21) -#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22) -#define CEPH_FEATURE_MSG_AUTH (1ULL<<23) -#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24) -#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25) -#define CEPH_FEATURE_CREATEPOOLID (1ULL<<26) -#define CEPH_FEATURE_REPLY_CREATE_INODE (1ULL<<27) -#define CEPH_FEATURE_OSD_HBMSGS (1ULL<<28) -#define CEPH_FEATURE_MDSENC (1ULL<<29) -#define CEPH_FEATURE_OSDHASHPSPOOL (1ULL<<30) -#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31) -#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32) -#define CEPH_FEATURE_MON_SCRUB (1ULL<<33) -#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34) -#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35) -#define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */ -#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37) -#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38) -#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */ -/* The process supports new-style OSDMap encoding. Monitors also use - this bit to determine if peers support NAK messages. */ -#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) -#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) -#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) -#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ -#define CEPH_FEATURE_MSGR_KEEPALIVE2 (1ULL<<42) -#define CEPH_FEATURE_OSD_POOLRESEND (1ULL<<43) -#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44) -#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45) -#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46) -#define CEPH_FEATURE_OSD_REPOP (1ULL<<46) /* overlap with fadvise */ -#define CEPH_FEATURE_OSD_OBJECT_DIGEST (1ULL<<46) /* overlap with fadvise */ -#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */ -#define CEPH_FEATURE_MDS_QUOTA (1ULL<<47) -#define CEPH_FEATURE_CRUSH_V4 (1ULL<<48) /* straw2 buckets */ -#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49) -// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY -#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */ -#define CEPH_FEATURE_MON_METADATA (1ULL<<50) -#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */ -#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52) -#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53) -#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54) -#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55) -#define CEPH_FEATURE_NEW_OSDOP_ENCODING (1ULL<<56) /* New, v7 encoding */ -#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */ -#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */ -#define CEPH_FEATURE_CRUSH_TUNABLES5 (1ULL<<58) /* chooseleaf stable mode */ -// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5 -#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */ -#define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */ - -#define CEPH_FEATURE_RESERVED (1ULL<<63) +#define CEPH_FEATURE_INCARNATION_1 (0ull) +#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL + +#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ + const static uint64_t CEPH_FEATURE_##name = (1ULL< Date: Mon, 5 Jun 2017 14:45:00 +0200 Subject: libceph: advertise support for OSD_POOLRESEND The code has been in place since commit 63244fa123a7 ("libceph: introduce ceph_osd_request_target, calc_target()"), and, with the ceph_{oloc,oid}_copy() issue fixed in the previous commit, is now in working order. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 7fce9ed44af0..89c68af48539 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -197,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ CEPH_FEATURE_MSGR_KEEPALIVE2 | \ + CEPH_FEATURE_OSD_POOLRESEND | \ CEPH_FEATURE_CRUSH_V4 | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) -- cgit v1.2.3 From 220abf5aa7ba5f544f1b589bde33761c60bbf9a0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2017 14:45:00 +0200 Subject: libceph: support SERVER_JEWEL feature bits Only MON_STATEFUL_SUB, really. MON_ROUTE_OSDMAP and OSDSUBOP_NO_SNAPCONTEXT are irrelevant. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 89c68af48539..78a58770e6e9 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -199,6 +199,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_OSD_POOLRESEND | \ CEPH_FEATURE_CRUSH_V4 | \ + CEPH_FEATURE_SERVER_JEWEL | \ + CEPH_FEATURE_MON_STATEFUL_SUB | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) -- cgit v1.2.3 From dc93e0e2831de2f80817b89aae4864b88332fcce Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2017 14:45:00 +0200 Subject: libceph: fold [l]req->last_force_resend into ceph_osd_request_target Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 85650b415e73..ef630ebd1169 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -148,6 +148,8 @@ struct ceph_osd_request_target { unsigned int flags; /* CEPH_OSD_FLAG_* */ bool paused; + u32 last_force_resend; + int osd; }; @@ -193,7 +195,6 @@ struct ceph_osd_request { unsigned long r_stamp; /* jiffies, send or check time */ unsigned long r_start_stamp; /* jiffies */ int r_attempts; - u32 r_last_force_resend; u32 r_map_dne_bound; struct ceph_osd_req_op r_ops[]; @@ -221,7 +222,6 @@ struct ceph_osd_linger_request { struct list_head pending_lworks; struct ceph_osd_request_target t; - u32 last_force_resend; u32 map_dne_bound; struct timespec mtime; -- cgit v1.2.3 From dc98ff7230e5ccf11c621dff0d590e83574a7184 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:53 +0200 Subject: libceph: introduce ceph_spg, ceph_pg_to_primary_shard() Store both raw pgid and actual spgid in ceph_osd_request_target. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 3 ++- include/linux/ceph/osdmap.h | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index ef630ebd1169..6114f7b02135 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -136,7 +136,8 @@ struct ceph_osd_request_target { struct ceph_object_id target_oid; struct ceph_object_locator target_oloc; - struct ceph_pg pgid; + struct ceph_pg pgid; /* last raw pg we mapped to */ + struct ceph_spg spgid; /* last actual spg we mapped to */ u32 pg_num; u32 pg_num_mask; struct ceph_osds acting; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 938656f70807..7ae5b416b4b6 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -24,6 +24,13 @@ struct ceph_pg { uint32_t seed; }; +#define CEPH_SPG_NOSHARD -1 + +struct ceph_spg { + struct ceph_pg pgid; + s8 shard; +}; + int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs); #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id @@ -271,6 +278,9 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap, const struct ceph_pg *raw_pgid, struct ceph_osds *up, struct ceph_osds *acting); +bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap, + const struct ceph_pg *raw_pgid, + struct ceph_spg *spgid); int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap, const struct ceph_pg *raw_pgid); -- cgit v1.2.3 From 98ad5ebd1505eb903ae8bc27e94c1ab0d1c3e651 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:54 +0200 Subject: libceph: ceph_connection_operations::reencode_message() method Give upper layers a chance to reencode the message after the connection is negotiated and ->peer_features is set. OSD client will use this to support both luminous and pre-luminous OSDs (in a single cluster): the former need MOSDOp v8; the latter will continue to be sent MOSDOp v4. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index c5c4c713e00f..fbd94d9fa5dd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -44,6 +44,8 @@ struct ceph_connection_operations { struct ceph_msg_header *hdr, int *skip); + void (*reencode_message) (struct ceph_msg *msg); + int (*sign_message) (struct ceph_msg *msg); int (*check_message_signature) (struct ceph_msg *msg); }; -- cgit v1.2.3 From 8cb441c0545dfd4dafeedc1e2d7157e1072413ac Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:54 +0200 Subject: libceph: MOSDOp v8 encoding (actual spgid + full hash) Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 17 +++++++++++++++++ include/linux/ceph/osdmap.h | 4 +++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6114f7b02135..bca2718ac253 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -205,6 +205,23 @@ struct ceph_request_redirect { struct ceph_object_locator oloc; }; +/* + * osd request identifier + * + * caller name + incarnation# + tid to unique identify this request + */ +struct ceph_osd_reqid { + struct ceph_entity_name name; + __le64 tid; + __le32 inc; +} __packed; + +struct ceph_blkin_trace_info { + __le64 trace_id; + __le64 span_id; + __le64 parent_span_id; +} __packed; + typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie, u64 notifier_id, void *data, size_t data_len); typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err); diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 7ae5b416b4b6..66447fc7f334 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -205,11 +205,13 @@ static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, return &map->osd_addr[osd]; } +#define CEPH_PGID_ENCODING_LEN (1 + 8 + 4 + 4) + static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) { __u8 version; - if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) { + if (!ceph_has_room(p, end, CEPH_PGID_ENCODING_LEN)) { pr_warn("incomplete pg encoding\n"); return -EINVAL; } -- cgit v1.2.3 From 7de030d6b10a56e991312a978ace6be3c090097c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:54 +0200 Subject: libceph: resend on PG splits if OSD has RESEND_ON_SPLIT Note that ceph_osd_request_target fields are updated regardless of RESEND_ON_SPLIT. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 66447fc7f334..63fb073a3355 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -249,6 +249,8 @@ static inline void ceph_osds_init(struct ceph_osds *set) void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src); +bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num, + u32 new_pg_num); bool ceph_is_new_interval(const struct ceph_osds *old_acting, const struct ceph_osds *new_acting, const struct ceph_osds *old_up, -- cgit v1.2.3 From 04c7d789e269c2b82bbd08106049a5a979cdb3fd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:55 +0200 Subject: libceph: make sure need_resend targets reflect latest map Otherwise we may miss events like PG splits, pool deletions, etc when we get multiple incremental maps at once. Because check_pool_dne() can now be fed an unlinked request, finish_request() needed to be taught to handle unlinked requests. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index bca2718ac253..62c672bcbb31 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -149,6 +149,7 @@ struct ceph_osd_request_target { unsigned int flags; /* CEPH_OSD_FLAG_* */ bool paused; + u32 epoch; u32 last_force_resend; int osd; -- cgit v1.2.3 From df28152d53b449a72258000f592472215fc9371e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:56 +0200 Subject: libceph: avoid unnecessary pi lookups in calc_target() Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 63fb073a3355..060d059acbf8 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -273,16 +273,22 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); +int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi, + const struct ceph_object_id *oid, + const struct ceph_object_locator *oloc, + struct ceph_pg *raw_pgid); int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, - struct ceph_object_id *oid, - struct ceph_object_locator *oloc, + const struct ceph_object_id *oid, + const struct ceph_object_locator *oloc, struct ceph_pg *raw_pgid); void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap, + struct ceph_pg_pool_info *pi, const struct ceph_pg *raw_pgid, struct ceph_osds *up, struct ceph_osds *acting); bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap, + struct ceph_pg_pool_info *pi, const struct ceph_pg *raw_pgid, struct ceph_spg *spgid); int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap, -- cgit v1.2.3 From 76f827a7b1faaaebc53f89d184e95ea3a0b8dd71 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 19 Jun 2017 12:18:05 +0200 Subject: libceph: make DEFINE_RB_* helpers more general Initially for ceph_pg_mapping, ceph_spg_mapping and ceph_hobject_id, compared with ceph_pg_compare(), ceph_spg_compare() and hoid_compare() respectively. Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 49 +++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 3229ae6c7846..8a79587e1317 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -184,10 +184,11 @@ static inline int calc_pages_for(u64 off, u64 len) (off >> PAGE_SHIFT); } -/* - * These are not meant to be generic - an integer key is assumed. - */ -#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +#define RB_BYVAL(a) (a) +#define RB_BYPTR(a) (&(a)) +#define RB_CMP3WAY(a, b) ((a) < (b) ? -1 : (a) > (b)) + +#define DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \ static void insert_##name(struct rb_root *root, type *t) \ { \ struct rb_node **n = &root->rb_node; \ @@ -197,11 +198,13 @@ static void insert_##name(struct rb_root *root, type *t) \ \ while (*n) { \ type *cur = rb_entry(*n, type, nodefld); \ + int cmp; \ \ parent = *n; \ - if (t->keyfld < cur->keyfld) \ + cmp = cmpexp(keyexp(t->keyfld), keyexp(cur->keyfld)); \ + if (cmp < 0) \ n = &(*n)->rb_left; \ - else if (t->keyfld > cur->keyfld) \ + else if (cmp > 0) \ n = &(*n)->rb_right; \ else \ BUG(); \ @@ -217,19 +220,24 @@ static void erase_##name(struct rb_root *root, type *t) \ RB_CLEAR_NODE(&t->nodefld); \ } -#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \ -extern type __lookup_##name##_key; \ -static type *lookup_##name(struct rb_root *root, \ - typeof(__lookup_##name##_key.keyfld) key) \ +/* + * @lookup_param_type is a parameter and not constructed from (@type, + * @keyfld) with typeof() because adding const is too unwieldy. + */ +#define DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \ + lookup_param_type, nodefld) \ +static type *lookup_##name(struct rb_root *root, lookup_param_type key) \ { \ struct rb_node *n = root->rb_node; \ \ while (n) { \ type *cur = rb_entry(n, type, nodefld); \ + int cmp; \ \ - if (key < cur->keyfld) \ + cmp = cmpexp(key, keyexp(cur->keyfld)); \ + if (cmp < 0) \ n = n->rb_left; \ - else if (key > cur->keyfld) \ + else if (cmp > 0) \ n = n->rb_right; \ else \ return cur; \ @@ -238,6 +246,23 @@ static type *lookup_##name(struct rb_root *root, \ return NULL; \ } +#define DEFINE_RB_FUNCS2(name, type, keyfld, cmpexp, keyexp, \ + lookup_param_type, nodefld) \ +DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \ +DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \ + lookup_param_type, nodefld) + +/* + * Shorthands for integer keys. + */ +#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, nodefld) + +#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \ +extern type __lookup_##name##_key; \ +DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, \ + typeof(__lookup_##name##_key.keyfld), nodefld) + #define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \ DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) -- cgit v1.2.3 From a02a946dfe9633d7e0202359836f6b5217a62824 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 19 Jun 2017 12:18:05 +0200 Subject: libceph: respect RADOS_BACKOFF backoffs Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + include/linux/ceph/osd_client.h | 45 +++++++++++++++++++++++++++++++++++++++++ include/linux/ceph/osdmap.h | 1 + include/linux/ceph/rados.h | 6 ++++++ 4 files changed, 53 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ad078ebe25d6..edf5b04b918a 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -147,6 +147,7 @@ struct ceph_dir_layout { #define CEPH_MSG_OSD_OP 42 #define CEPH_MSG_OSD_OPREPLY 43 #define CEPH_MSG_WATCH_NOTIFY 44 +#define CEPH_MSG_OSD_BACKOFF 61 /* watch-notify operations */ diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 62c672bcbb31..c6d96a5f46fd 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -1,6 +1,7 @@ #ifndef _FS_CEPH_OSD_CLIENT_H #define _FS_CEPH_OSD_CLIENT_H +#include #include #include #include @@ -36,6 +37,8 @@ struct ceph_osd { struct ceph_connection o_con; struct rb_root o_requests; struct rb_root o_linger_requests; + struct rb_root o_backoff_mappings; + struct rb_root o_backoffs_by_id; struct list_head o_osd_lru; struct ceph_auth_handshake o_auth; unsigned long lru_ttl; @@ -275,6 +278,48 @@ struct ceph_watch_item { struct ceph_entity_addr addr; }; +struct ceph_spg_mapping { + struct rb_node node; + struct ceph_spg spgid; + + struct rb_root backoffs; +}; + +struct ceph_hobject_id { + void *key; + size_t key_len; + void *oid; + size_t oid_len; + u64 snapid; + u32 hash; + u8 is_max; + void *nspace; + size_t nspace_len; + s64 pool; + + /* cache */ + u32 hash_reverse_bits; +}; + +static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid) +{ + hoid->hash_reverse_bits = bitrev32(hoid->hash); +} + +/* + * PG-wide backoff: [begin, end) + * per-object backoff: begin == end + */ +struct ceph_osd_backoff { + struct rb_node spg_node; + struct rb_node id_node; + + struct ceph_spg spgid; + u64 id; + struct ceph_hobject_id *begin; + struct ceph_hobject_id *end; +}; + #define CEPH_LINGER_ID_START 0xffff000000000000ULL struct ceph_osd_client { diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 060d059acbf8..fe6d189bdd30 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -32,6 +32,7 @@ struct ceph_spg { }; int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs); +int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id together */ diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 5d0018782d50..385db08bb8b2 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -439,6 +439,12 @@ enum { const char *ceph_osd_watch_op_name(int o); +enum { + CEPH_OSD_BACKOFF_OP_BLOCK = 1, + CEPH_OSD_BACKOFF_OP_ACK_BLOCK = 2, + CEPH_OSD_BACKOFF_OP_UNBLOCK = 3, +}; + /* * an individual object operation. each may be accompanied by some data * payload -- cgit v1.2.3 From 278b1d709c6acc6f7d138fed775c76695b068e43 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 21 Jun 2017 17:27:17 +0200 Subject: libceph: ceph_decode_skip_* helpers Some of these won't be as efficient as they could be (e.g. ceph_decode_skip_set(... 32 ...) could advance by len * sizeof(u32) once instead of advancing by sizeof(u32) len times), but that's fine and not worth a bunch of extra macro code. Replace skip_name_map() with ceph_decode_skip_map as an example. Signed-off-by: Ilya Dryomov --- include/linux/ceph/decode.h | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index f990f2cc907a..14af9b70d301 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -132,6 +132,66 @@ bad: return ERR_PTR(-ERANGE); } +/* + * skip helpers + */ +#define ceph_decode_skip_n(p, end, n, bad) \ + do { \ + ceph_decode_need(p, end, n, bad); \ + *p += n; \ + } while (0) + +#define ceph_decode_skip_64(p, end, bad) \ +ceph_decode_skip_n(p, end, sizeof(u64), bad) + +#define ceph_decode_skip_32(p, end, bad) \ +ceph_decode_skip_n(p, end, sizeof(u32), bad) + +#define ceph_decode_skip_16(p, end, bad) \ +ceph_decode_skip_n(p, end, sizeof(u16), bad) + +#define ceph_decode_skip_8(p, end, bad) \ +ceph_decode_skip_n(p, end, sizeof(u8), bad) + +#define ceph_decode_skip_string(p, end, bad) \ + do { \ + u32 len; \ + \ + ceph_decode_32_safe(p, end, len, bad); \ + ceph_decode_skip_n(p, end, len, bad); \ + } while (0) + +#define ceph_decode_skip_set(p, end, type, bad) \ + do { \ + u32 len; \ + \ + ceph_decode_32_safe(p, end, len, bad); \ + while (len--) \ + ceph_decode_skip_##type(p, end, bad); \ + } while (0) + +#define ceph_decode_skip_map(p, end, ktype, vtype, bad) \ + do { \ + u32 len; \ + \ + ceph_decode_32_safe(p, end, len, bad); \ + while (len--) { \ + ceph_decode_skip_##ktype(p, end, bad); \ + ceph_decode_skip_##vtype(p, end, bad); \ + } \ + } while (0) + +#define ceph_decode_skip_map_of_map(p, end, ktype1, ktype2, vtype2, bad) \ + do { \ + u32 len; \ + \ + ceph_decode_32_safe(p, end, len, bad); \ + while (len--) { \ + ceph_decode_skip_##ktype1(p, end, bad); \ + ceph_decode_skip_map(p, end, ktype2, vtype2, bad); \ + } \ + } while (0) + /* * struct ceph_timespec <-> struct timespec */ -- cgit v1.2.3 From 6f428df47dae2c8ea31fd4c0c74a12a8a5ac2d1d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 21 Jun 2017 17:27:18 +0200 Subject: libceph: pg_upmap[_items] infrastructure pg_temp and pg_upmap encodings are the same (PG -> array of osds), except for the incremental remove: it's an empty mapping in new_pg_temp for pg_temp and a separate old_pg_upmap set for pg_upmap. (This isn't to allow for empty pg_upmap mappings -- apparently, pg_temp just wasn't looked at as an example for pg_upmap encoding.) Reuse __decode_pg_temp() for decoding pg_upmap and new_pg_upmap. __decode_pg_temp() stores into pg_temp union member, but since pg_upmap union member is identical, reading through pg_upmap later is OK. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index fe6d189bdd30..c612cff81f5c 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -143,10 +143,14 @@ struct ceph_pg_mapping { struct { int len; int osds[]; - } pg_temp; + } pg_temp, pg_upmap; struct { int osd; } primary_temp; + struct { + int len; + int from_to[][2]; + } pg_upmap_items; }; }; @@ -165,6 +169,10 @@ struct ceph_osdmap { struct rb_root pg_temp; struct rb_root primary_temp; + /* remap (post-CRUSH, pre-up) */ + struct rb_root pg_upmap; /* PG := raw set */ + struct rb_root pg_upmap_items; /* from -> to within raw set */ + u32 *osd_primary_affinity; struct rb_root pg_pools; -- cgit v1.2.3 From 069f3222ca96acfe8c59937e98c401bda5475b48 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 22 Jun 2017 19:44:05 +0200 Subject: crush: implement weight and id overrides for straw2 bucket_straw2_choose needs to use weights that may be different from weight_items. For instance to compensate for an uneven distribution caused by a low number of values. Or to fix the probability biais introduced by conditional probabilities (see http://tracker.ceph.com/issues/15653 for more information). We introduce a weight_set for each straw2 bucket to set the desired weight for a given item at a given position. The weight of a given item when picking the first replica (first position) may be different from the weight the second replica (second position). For instance the weight matrix for a given bucket containing items 3, 7 and 13 could be as follows: position 0 position 1 item 3 0x10000 0x100000 item 7 0x40000 0x10000 item 13 0x40000 0x10000 When crush_do_rule picks the first of two replicas (position 0), item 7, 3 are four times more likely to be choosen by bucket_straw2_choose than item 13. When choosing the second replica (position 1), item 3 is ten times more likely to be choosen than item 7, 13. By default the weight_set of each bucket exactly matches the content of item_weights for each position to ensure backward compatibility. bucket_straw2_choose compares items by using their id. The same ids are also used to index buckets and they must be unique. For each item in a bucket an array of ids can be provided for placement purposes and they are used instead of the ids. If no replacement ids are provided, the legacy behavior is preserved. Reflects ceph.git commit 19537a450fd5c5a0bb8b7830947507a76db2ceca. Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 58 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/crush/mapper.h | 9 +++---- 2 files changed, 62 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index fbecbd089d75..d8676e56fa23 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -137,6 +137,64 @@ struct crush_bucket { }; +/** @ingroup API + * + * Replacement weights for each item in a bucket. The size of the + * array must be exactly the size of the straw2 bucket, just as the + * item_weights array. + * + */ +struct crush_weight_set { + __u32 *weights; /*!< 16.16 fixed point weights + in the same order as items */ + __u32 size; /*!< size of the __weights__ array */ +}; + +/** @ingroup API + * + * Replacement weights and ids for a given straw2 bucket, for + * placement purposes. + * + * When crush_do_rule() chooses the Nth item from a straw2 bucket, the + * replacement weights found at __weight_set[N]__ are used instead of + * the weights from __item_weights__. If __N__ is greater than + * __weight_set_size__, the weights found at __weight_set_size-1__ are + * used instead. For instance if __weight_set__ is: + * + * [ [ 0x10000, 0x20000 ], // position 0 + * [ 0x20000, 0x40000 ] ] // position 1 + * + * choosing the 0th item will use position 0 weights [ 0x10000, 0x20000 ] + * choosing the 1th item will use position 1 weights [ 0x20000, 0x40000 ] + * choosing the 2th item will use position 1 weights [ 0x20000, 0x40000 ] + * etc. + * + */ +struct crush_choose_arg { + __s32 *ids; /*!< values to use instead of items */ + __u32 ids_size; /*!< size of the __ids__ array */ + struct crush_weight_set *weight_set; /*!< weight replacements for + a given position */ + __u32 weight_set_size; /*!< size of the __weight_set__ array */ +}; + +/** @ingroup API + * + * Replacement weights and ids for each bucket in the crushmap. The + * __size__ of the __args__ array must be exactly the same as the + * __map->max_buckets__. + * + * The __crush_choose_arg__ at index N will be used when choosing + * an item from the bucket __map->buckets[N]__ bucket, provided it + * is a straw2 bucket. + * + */ +struct crush_choose_arg_map { + struct crush_choose_arg *args; /*!< replacement for each bucket + in the crushmap */ + __u32 size; /*!< size of the __args__ array */ +}; + struct crush_bucket_uniform { struct crush_bucket h; __u32 item_weight; /* 16-bit fixed point; all items equally weighted */ diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index c95e19e1ff11..141edabb947e 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -11,11 +11,10 @@ #include "crush.h" extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size); -extern int crush_do_rule(const struct crush_map *map, - int ruleno, - int x, int *result, int result_max, - const __u32 *weights, int weight_max, - void *cwin); +int crush_do_rule(const struct crush_map *map, + int ruleno, int x, int *result, int result_max, + const __u32 *weight, int weight_max, + void *cwin, const struct crush_choose_arg *choose_args); /* * Returns the exact amount of workspace that will need to be used -- cgit v1.2.3 From 5cf9c4a9959b6273675310d14a834ef14fbca37c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 22 Jun 2017 19:44:05 +0200 Subject: libceph, crush: per-pool crush_choose_arg_map for crush_do_rule() If there is no crush_choose_arg_map for a given pool, a NULL pointer is passed to preserve existing crush_do_rule() behavior. Reflects ceph.git commits 55fb91d64071552ea1bc65ab4ea84d3c8b73ab4b, dbe36e08be00c6519a8c89718dd47b0219c20516. Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index d8676e56fa23..92e165d417a6 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -2,6 +2,7 @@ #define CEPH_CRUSH_CRUSH_H #ifdef __KERNEL__ +# include # include #else # include "crush_compat.h" @@ -190,6 +191,10 @@ struct crush_choose_arg { * */ struct crush_choose_arg_map { +#ifdef __KERNEL__ + struct rb_node node; + u64 choose_args_index; +#endif struct crush_choose_arg *args; /*!< replacement for each bucket in the crushmap */ __u32 size; /*!< size of the __args__ array */ @@ -294,6 +299,9 @@ struct crush_map { __u32 allowed_bucket_algs; __u32 *choose_tries; +#else + /* CrushWrapper::choose_args */ + struct rb_root choose_args; #endif }; -- cgit v1.2.3 From 0bb05da2ec57163b7a25efef001ed8f52b18b070 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 22 Jun 2017 19:44:06 +0200 Subject: libceph: osd_state is 32 bits wide in luminous Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index c612cff81f5c..a0996cb9faed 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -162,7 +162,7 @@ struct ceph_osdmap { u32 flags; /* CEPH_OSDMAP_* */ u32 max_osd; /* size of osd_state, _offload, _addr arrays */ - u8 *osd_state; /* CEPH_OSD_* */ + u32 *osd_state; /* CEPH_OSD_* */ u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */ struct ceph_entity_addr *osd_addr; @@ -203,7 +203,7 @@ static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd) return !ceph_osd_is_up(map, osd); } -extern char *ceph_osdmap_state_str(char *str, int len, int state); +char *ceph_osdmap_state_str(char *str, int len, u32 state); extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd); static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, -- cgit v1.2.3 From 33e9c8dbfbcef8e4cda8e43a445e692ab7e0d8c0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Jun 2017 12:05:55 +0200 Subject: libceph: advertise support for NEW_OSDOP_ENCODING and SERVER_LUMINOUS All four SERVER_LUMINOUS feature bits are implemented, switch it on! NEW_OSDOP_ENCODING doesn't mean much for the client (it signifies support for MOSDOp v6) but needs to be enabled in order to get the latest (currently v25) pg_pool_t. Signed-off-by: Ilya Dryomov Acked-by: Sage Weil --- include/linux/ceph/ceph_features.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 78a58770e6e9..f0f6c537b64c 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -184,6 +184,11 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_OSDENC | \ CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_SERVER_LUMINOUS | \ + CEPH_FEATURE_RESEND_ON_SPLIT | \ + CEPH_FEATURE_RADOS_BACKOFF | \ + CEPH_FEATURE_OSDMAP_PG_UPMAP | \ + CEPH_FEATURE_CRUSH_CHOOSE_ARGS | \ CEPH_FEATURE_MSG_AUTH | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE | \ @@ -199,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_OSD_POOLRESEND | \ CEPH_FEATURE_CRUSH_V4 | \ + CEPH_FEATURE_NEW_OSDOP_ENCODING | \ CEPH_FEATURE_SERVER_JEWEL | \ CEPH_FEATURE_MON_STATEFUL_SUB | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ -- cgit v1.2.3 From 5e14e9fac308daf5607362f879e6de67e0b8dd5b Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Tue, 20 Jun 2017 10:17:40 +0200 Subject: PCI: tango: Add Sigma Designs Tango SMP8759 PCIe host bridge support This driver is required to work around several hardware bugs in the PCIe controller. The SMP8759 does not support legacy interrupts or IO space. Signed-off-by: Marc Gonzalez [bhelgaas: add CONFIG_BROKEN dependency, various cleanups] Signed-off-by: Bjorn Helgaas --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5f6b71d15393..c71e532da458 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1373,6 +1373,8 @@ #define PCI_DEVICE_ID_TTI_HPT374 0x0008 #define PCI_DEVICE_ID_TTI_HPT372N 0x0009 /* apparently a 372N variant? */ +#define PCI_VENDOR_ID_SIGMA 0x1105 + #define PCI_VENDOR_ID_VIA 0x1106 #define PCI_DEVICE_ID_VIA_8763_0 0x0198 #define PCI_DEVICE_ID_VIA_8380_0 0x0204 -- cgit v1.2.3 From 49d31c2f389acfe83417083e1208422b4091cd9e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Jul 2017 14:51:19 -0400 Subject: dentry name snapshots take_dentry_name_snapshot() takes a safe snapshot of dentry name; if the name is a short one, it gets copied into caller-supplied structure, otherwise an extra reference to external name is grabbed (those are never modified). In either case the pointer to stable string is stored into the same structure. dentry must be held by the caller of take_dentry_name_snapshot(), but may be freely dropped afterwards - the snapshot will stay until destroyed by release_dentry_name_snapshot(). Intended use: struct name_snapshot s; take_dentry_name_snapshot(&s, dentry); ... access s.name ... release_dentry_name_snapshot(&s); Replaces fsnotify_oldname_...(), gets used in fsnotify to obtain the name to pass down with event. Signed-off-by: Al Viro --- include/linux/dcache.h | 6 ++++++ include/linux/fsnotify.h | 31 ------------------------------- 2 files changed, 6 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d2e38dc6172c..025727bf6797 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -591,5 +591,11 @@ static inline struct inode *d_real_inode(const struct dentry *dentry) return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0)); } +struct name_snapshot { + const char *name; + char inline_name[DNAME_INLINE_LEN]; +}; +void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); +void release_dentry_name_snapshot(struct name_snapshot *); #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index b43d3f5bd9ea..b78aa7ac77ce 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -293,35 +293,4 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) } } -#if defined(CONFIG_FSNOTIFY) /* notify helpers */ - -/* - * fsnotify_oldname_init - save off the old filename before we change it - */ -static inline const unsigned char *fsnotify_oldname_init(const unsigned char *name) -{ - return kstrdup(name, GFP_KERNEL); -} - -/* - * fsnotify_oldname_free - free the name we got from fsnotify_oldname_init - */ -static inline void fsnotify_oldname_free(const unsigned char *old_name) -{ - kfree(old_name); -} - -#else /* CONFIG_FSNOTIFY */ - -static inline const char *fsnotify_oldname_init(const unsigned char *name) -{ - return NULL; -} - -static inline void fsnotify_oldname_free(const unsigned char *old_name) -{ -} - -#endif /* CONFIG_FSNOTIFY */ - #endif /* _LINUX_FS_NOTIFY_H */ -- cgit v1.2.3 From 659b957f20c78fd470083c80af5e79eedfb39e5b Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 7 Jul 2017 22:37:24 +0530 Subject: kprobes: Rename [arch_]function_offset_within_entry() to [arch_]kprobe_on_func_entry() Rename function_offset_within_entry() to scope it to kprobe namespace by using kprobe_ prefix, and to also simplify it. Suggested-by: Ingo Molnar Suggested-by: Masami Hiramatsu Signed-off-by: Naveen N. Rao Cc: Ananth N Mavinakayanahalli Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/3aa6c7e2e4fb6e00f3c24fa306496a66edb558ea.1499443367.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 541df0b5b815..bd2684700b74 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -267,8 +267,8 @@ extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); -extern bool arch_function_offset_within_entry(unsigned long offset); -extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); +extern bool arch_kprobe_on_func_entry(unsigned long offset); +extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); -- cgit v1.2.3 From f51048c3e07b68c90b21a77541fc4b208f9244d7 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 6 Jul 2017 15:01:57 -0700 Subject: bonding: avoid NETDEV_CHANGEMTU event when unregistering slave As Hongjun/Nicolas summarized in their original patch: " When a device changes from one netns to another, it's first unregistered, then the netns reference is updated and the dev is registered in the new netns. Thus, when a slave moves to another netns, it is first unregistered. This triggers a NETDEV_UNREGISTER event which is caught by the bonding driver. The driver calls bond_release(), which calls dev_set_mtu() and thus triggers NETDEV_CHANGEMTU (the device is still in the old netns). " This is a very special case, because the device is being unregistered no one should still care about the NETDEV_CHANGEMTU event triggered at this point, we can avoid broadcasting this event on this path, and avoid touching inetdev_event()/addrconf_notify() path. It requires to export __dev_set_mtu() to bonding driver. Reported-by: Hongjun Li Reported-by: Nicolas Dichtel Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e48ee2eaaa3e..779b23595596 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3284,6 +3284,7 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags, int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); int dev_change_net_namespace(struct net_device *, struct net *, const char *); +int __dev_set_mtu(struct net_device *, int); int dev_set_mtu(struct net_device *, int); void dev_set_group(struct net_device *, int); int dev_set_mac_address(struct net_device *, struct sockaddr *); -- cgit v1.2.3 From 8f1a357d41a22009150cf404b5aa5876efdb59b1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 6 Jul 2017 20:26:17 +0300 Subject: i2c: Provide a stub for i2c_detect_slave_mode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers would like to call i2c_detect_slave_mode() even if !I2C_SLAVE. Give them what they want to, Otherwise kernel will not compile: drivers/i2c/busses/i2c-designware-platdrv.c: In function ‘dw_i2c_plat_probe’: drivers/i2c/busses/i2c-designware-platdrv.c:331:6: error: implicit declaration of function ‘i2c_detect_slave_mode’ [-Werror=implicit-function-declaration] if (i2c_detect_slave_mode(&pdev->dev)) ^~~~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors Fixes: 6e38cf3b4421 ("i2c: designware: Let slave adapter support be optional") Reported-by: Abdul Haleem Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 72d0ece70ed3..00ca5b86a753 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -295,6 +295,8 @@ static inline int i2c_slave_event(struct i2c_client *client, { return client->slave_cb(client, event, val); } +#else +static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } #endif /** -- cgit v1.2.3 From d1438ad8f3eec7207618b8e01f9f3eec7b6f67c4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 7 Jul 2017 18:08:25 -0700 Subject: nvme_fc/nvmet_fc: revise Create Association descriptor length Revises the Create Association LS for the amount of pad expected in 1.16. Add defines for the minimum lengths that a target can accept (e.g. variable pad lengths) Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- include/linux/nvme-fc.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index bc711a10be05..21c37e39e41a 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -17,6 +17,7 @@ /* * This file contains definitions relative to FC-NVME r1.14 (16-020vB). + * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content. */ #ifndef _NVME_FC_H @@ -193,9 +194,21 @@ struct fcnvme_lsdesc_cr_assoc_cmd { uuid_t hostid; u8 hostnqn[FCNVME_ASSOC_HOSTNQN_LEN]; u8 subnqn[FCNVME_ASSOC_SUBNQN_LEN]; - u8 rsvd632[384]; + __be32 rsvd584[108]; /* pad to 1016 bytes, + * which makes overall LS rqst + * payload 1024 bytes + */ }; +#define FCNVME_LSDESC_CRA_CMD_DESC_MINLEN \ + offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, rsvd584) + +#define FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN \ + (FCNVME_LSDESC_CRA_CMD_DESC_MINLEN - \ + offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, ersp_ratio)) + + + /* FCNVME_LSDESC_CREATE_CONN_CMD */ struct fcnvme_lsdesc_cr_conn_cmd { __be32 desc_tag; /* FCNVME_LSDESC_xxx */ @@ -273,6 +286,14 @@ struct fcnvme_ls_cr_assoc_rqst { struct fcnvme_lsdesc_cr_assoc_cmd assoc_cmd; }; +#define FCNVME_LSDESC_CRA_RQST_MINLEN \ + (offsetof(struct fcnvme_ls_cr_assoc_rqst, assoc_cmd) + \ + FCNVME_LSDESC_CRA_CMD_DESC_MINLEN) + +#define FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN \ + FCNVME_LSDESC_CRA_CMD_DESC_MINLEN + + struct fcnvme_ls_cr_assoc_acc { struct fcnvme_ls_acc_hdr hdr; struct fcnvme_lsdesc_assoc_id associd; -- cgit v1.2.3 From 7e988b103d0d52190244517edc76e649071284bb Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 7 Jul 2017 15:49:00 +0200 Subject: KVM: use correct accessor function for __kvm_memslots kvm memslots are protected by srcu and not by rcu. We must use srcu_dereference_check instead of rcu_dereference_check. Signed-off-by: Christian Borntraeger Suggested-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b3ca77a96b2d..648b34cabb38 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -568,9 +568,8 @@ void kvm_put_kvm(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { - return rcu_dereference_check(kvm->memslots[as_id], - srcu_read_lock_held(&kvm->srcu) - || lockdep_is_held(&kvm->slots_lock)); + return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, + lockdep_is_held(&kvm->slots_lock)); } static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) -- cgit v1.2.3 From c43aeb198048f64abda8655fdcdebe71cf1877ba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Jul 2017 07:40:49 -0400 Subject: fix brown paperbag bug in inlined copy_..._iter() "copied nothing" == "return 0", not "return full size". Fixes: aa28de275a24 "iov_iter/hardening: move object size checks to inlined part" Spotted-by: Arnd Bergmann Signed-off-by: Al Viro --- include/linux/uio.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 342d2dc225b9..8a642cda641c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -103,7 +103,7 @@ static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, true))) - return bytes; + return 0; else return _copy_to_iter(addr, bytes, i); } @@ -112,7 +112,7 @@ static __always_inline __must_check size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, false))) - return bytes; + return 0; else return _copy_from_iter(addr, bytes, i); } @@ -130,7 +130,7 @@ static __always_inline __must_check size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, false))) - return bytes; + return 0; else return _copy_from_iter_nocache(addr, bytes, i); } @@ -160,7 +160,7 @@ static __always_inline __must_check size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, false))) - return bytes; + return 0; else return _copy_from_iter_flushcache(addr, bytes, i); } -- cgit v1.2.3 From 42a6e0996084972574e0a2b23e7326b78b0f64c5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Jul 2017 13:22:50 +0200 Subject: nvmem: include linux/err.h from header The new support for nvmem devices from the rtc layer caused a build error in some configurations: include/linux/nvmem-provider.h: In function 'nvmem_register': include/linux/nvmem-provider.h:51:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration] This adds the missing include to ensure we can always include the header. Fixes: 697e5a47aa12 ("rtc: add generic nvmem support") Signed-off-by: Arnd Bergmann Acked-by: Srinivas Kandagatla Signed-off-by: Alexandre Belloni --- include/linux/nvmem-provider.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index cd93416d762e..497706f5adca 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -12,6 +12,9 @@ #ifndef _LINUX_NVMEM_PROVIDER_H #define _LINUX_NVMEM_PROVIDER_H +#include +#include + struct nvmem_device; struct nvmem_cell_info; typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, -- cgit v1.2.3 From 23955622ff8d231bcc9650b3d06583f117a6e3ba Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 10 Jul 2017 15:47:11 -0700 Subject: swap: add block io poll in swapin path For fast flash disk, async IO could introduce overhead because of context switch. block-mq now supports IO poll, which improves performance and latency a lot. swapin is a good place to use this technique, because the task is waiting for the swapin page to continue execution. In my virtual machine, directly read 4k data from a NVMe with iopoll is about 60% better than that without poll. With iopoll support in swapin patch, my microbenchmark (a task does random memory write) is about 10%~25% faster. CPU utilization increases a lot though, 2x and even 3x CPU utilization. This will depend on disk speed. While iopoll in swapin isn't intended for all usage cases, it's a win for latency sensistive workloads with high speed swap disk. block layer has knob to control poll in runtime. If poll isn't enabled in block layer, there should be no noticeable change in swapin. I got a chance to run the same test in a NVMe with DRAM as the media. In simple fio IO test, blkpoll boosts 50% performance in single thread test and ~20% in 8 threads test. So this is the base line. In above swap test, blkpoll boosts ~27% performance in single thread test. blkpoll uses 2x CPU time though. If we enable hybid polling, the performance gain has very slight drop but CPU time is only 50% worse than that without blkpoll. Also we can adjust parameter of hybid poll, with it, the CPU time penality is reduced further. In 8 threads test, blkpoll doesn't help though. The performance is similar to that without blkpoll, but cpu utilization is similar too. There is lock contention in swap path. The cpu time spending on blkpoll isn't high. So overall, blkpoll swapin isn't worse than that without it. The swapin readahead might read several pages in in the same time and form a big IO request. Since the IO will take longer time, it doesn't make sense to do poll, so the patch only does iopoll for single page swapin. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/070c3c3e40b711e7b1390002c991e86a-b5408f0@7511894063d3764ff01ea8111f5a004d7dd700ed078797c204a24e620ddb965c Signed-off-by: Shaohua Li Cc: Tim Chen Cc: Huang Ying Cc: Jens Axboe Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 5ab1c98c7d27..61e7180cee21 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -331,7 +331,7 @@ extern void kswapd_stop(int nid); #include /* for bio_end_io_t */ /* linux/mm/page_io.c */ -extern int swap_readpage(struct page *); +extern int swap_readpage(struct page *page, bool do_poll); extern int swap_writepage(struct page *page, struct writeback_control *wbc); extern void end_swap_bio_write(struct bio *bio); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, @@ -362,7 +362,8 @@ extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t); extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, - struct vm_area_struct *vma, unsigned long addr); + struct vm_area_struct *vma, unsigned long addr, + bool do_poll); extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool *new_page_allocated); -- cgit v1.2.3 From b37ff71cc626a0c1b5e098ff9a0b723815f6aaeb Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:38 -0700 Subject: mm: hwpoison: change PageHWPoison behavior on hugetlb pages We'd like to narrow down the error region in memory error on hugetlb pages. However, currently we set PageHWPoison flags on all subpages in the error hugepage and add # of subpages to num_hwpoison_pages, which doesn't fit our purpose. So this patch changes the behavior and we only set PageHWPoison on the head page then increase num_hwpoison_pages only by 1. This is a preparation for narrow-down part which comes in later patches. Link: http://lkml.kernel.org/r/1496305019-5493-4-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Cc: Michal Hocko Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 5c3a5f3e7eec..c5ff7b217ee6 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -196,15 +196,6 @@ static inline void num_poisoned_pages_dec(void) atomic_long_dec(&num_poisoned_pages); } -static inline void num_poisoned_pages_add(long num) -{ - atomic_long_add(num, &num_poisoned_pages); -} - -static inline void num_poisoned_pages_sub(long num) -{ - atomic_long_sub(num, &num_poisoned_pages); -} #else static inline swp_entry_t make_hwpoison_entry(struct page *page) -- cgit v1.2.3 From c3114a84f7f96c9d5c73c8bfa7e21ff42fda97e2 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 10 Jul 2017 15:47:41 -0700 Subject: mm: hugetlb: soft-offline: dissolve source hugepage after successful migration Currently hugepage migrated by soft-offline (i.e. due to correctable memory errors) is contained as a hugepage, which means many non-error pages in it are unreusable, i.e. wasted. This patch solves this issue by dissolving source hugepages into buddy. As done in previous patch, PageHWPoison is set only on a head page of the error hugepage. Then in dissoliving we move the PageHWPoison flag to the raw error page so that all healthy subpages return back to buddy. [arnd@arndb.de: fix warnings: replace some macros with inline functions] Link: http://lkml.kernel.org/r/20170609102544.2947326-1-arnd@arndb.de Link: http://lkml.kernel.org/r/1496305019-5493-5-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Anshuman Khandual Signed-off-by: Naoya Horiguchi Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 46bfb702e7d6..668ab1742ef6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -472,6 +472,7 @@ static inline pgoff_t basepage_index(struct page *page) return __basepage_index(page); } +extern int dissolve_free_huge_page(struct page *page); extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); static inline bool hugepage_migration_supported(struct hstate *h) @@ -550,15 +551,37 @@ static inline unsigned int pages_per_huge_page(struct hstate *h) { return 1; } -#define hstate_index_to_shift(index) 0 -#define hstate_index(h) 0 + +static inline unsigned hstate_index_to_shift(unsigned index) +{ + return 0; +} + +static inline int hstate_index(struct hstate *h) +{ + return 0; +} static inline pgoff_t basepage_index(struct page *page) { return page->index; } -#define dissolve_free_huge_pages(s, e) 0 -#define hugepage_migration_supported(h) false + +static inline int dissolve_free_huge_page(struct page *page) +{ + return 0; +} + +static inline int dissolve_free_huge_pages(unsigned long start_pfn, + unsigned long end_pfn) +{ + return 0; +} + +static inline bool hugepage_migration_supported(struct hstate *h) +{ + return false; +} static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) -- cgit v1.2.3 From ddd40d8a2c4ef8f2152ea6d227e11475cf7e5bfa Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:53 -0700 Subject: mm: hugetlb: delete dequeue_hwpoisoned_huge_page() dequeue_hwpoisoned_huge_page() is no longer used, so let's remove it. Link: http://lkml.kernel.org/r/1496305019-5493-9-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 668ab1742ef6..57f700ac127e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -116,7 +116,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); void free_huge_page(struct page *page); @@ -192,10 +191,6 @@ static inline void hugetlb_show_meminfo(void) #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) #define huge_pte_offset(mm, address, sz) 0 -static inline int dequeue_hwpoisoned_huge_page(struct page *page) -{ - return 0; -} static inline bool isolate_huge_page(struct page *page, struct list_head *list) { -- cgit v1.2.3 From 1860033237d4be09c5d7382585f0c7229367a534 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:48:02 -0700 Subject: mm: make PR_SET_THP_DISABLE immediately active PR_SET_THP_DISABLE has a rather subtle semantic. It doesn't affect any existing mapping because it only updated mm->def_flags which is a template for new mappings. The mappings created after prctl(PR_SET_THP_DISABLE) have VM_NOHUGEPAGE flag set. This can be quite surprising for all those applications which do not do prctl(); fork() & exec() and want to control their own THP behavior. Another usecase when the immediate semantic of the prctl might be useful is a combination of pre- and post-copy migration of containers with CRIU. In this case CRIU populates a part of a memory region with data that was saved during the pre-copy stage. Afterwards, the region is registered with userfaultfd and CRIU expects to get page faults for the parts of the region that were not yet populated. However, khugepaged collapses the pages and the expected page faults do not occur. In more general case, the prctl(PR_SET_THP_DISABLE) could be used as a temporary mechanism for enabling/disabling THP process wide. Implementation wise, a new MMF_DISABLE_THP flag is added. This flag is tested when decision whether to use huge pages is taken either during page fault of at the time of THP collapse. It should be noted, that the new implementation makes PR_SET_THP_DISABLE master override to any per-VMA setting, which was not the case previously. Fixes: a0715cc22601 ("mm, thp: add VM_INIT_DEF_MASK and PRCTL_THP_DISABLE") Link: http://lkml.kernel.org/r/1496415802-30944-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Michal Hocko Signed-off-by: Mike Rapoport Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Arnd Bergmann Cc: "Kirill A. Shutemov" Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 1 + include/linux/khugepaged.h | 3 ++- include/linux/sched/coredump.h | 5 ++++- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index d3b3e8fcc717..40d7b7dd2653 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -92,6 +92,7 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); (1<vm_flags & VM_HUGEPAGE))) && \ !((__vma)->vm_flags & VM_NOHUGEPAGE) && \ + !test_bit(MMF_DISABLE_THP, &(__vma)->vm_mm->flags) && \ !is_vma_temporary_stack(__vma)) #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 5d9a400af509..f0d7335336cd 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -48,7 +48,8 @@ static inline int khugepaged_enter(struct vm_area_struct *vma, if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) if ((khugepaged_always() || (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) && - !(vm_flags & VM_NOHUGEPAGE)) + !(vm_flags & VM_NOHUGEPAGE) && + !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) if (__khugepaged_enter(vma->vm_mm)) return -ENOMEM; return 0; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 69eedcef8f03..98ae0d05aa32 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -68,7 +68,10 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ +#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ + MMF_DISABLE_THP_MASK) #endif /* _LINUX_SCHED_COREDUMP_H */ -- cgit v1.2.3 From 16981d763501c0e06e434cf6b59f964c520e0ccc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 10 Jul 2017 15:48:22 -0700 Subject: mm: improve readability of transparent_hugepage_enabled() Turn the macro into a static inline and rewrite the condition checks for better readability in preparation for adding another condition. [ross.zwisler@linux.intel.com: fix logic to make conversion equivalent] [akpm@linux-foundation.org: resolve vs mm-make-pr_set_thp_disable-immediately-active.patch] [akpm@linux-foundation.org: include coredump.h for MMF_DISABLE_THP] Link: http://lkml.kernel.org/r/149739530612.20686.14760671150202647861.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: Ross Zwisler Acked-by: "Kirill A. Shutemov" Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 40d7b7dd2653..f4239d3c9c73 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -1,6 +1,8 @@ #ifndef _LINUX_HUGE_MM_H #define _LINUX_HUGE_MM_H +#include + extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf); extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, @@ -85,15 +87,29 @@ extern struct kobj_attribute shmem_enabled_attr; extern bool is_vma_temporary_stack(struct vm_area_struct *vma); -#define transparent_hugepage_enabled(__vma) \ - ((transparent_hugepage_flags & \ - (1<vm_flags & VM_HUGEPAGE))) && \ - !((__vma)->vm_flags & VM_NOHUGEPAGE) && \ - !test_bit(MMF_DISABLE_THP, &(__vma)->vm_mm->flags) && \ - !is_vma_temporary_stack(__vma)) +extern unsigned long transparent_hugepage_flags; + +static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_NOHUGEPAGE) + return false; + + if (is_vma_temporary_stack(vma)) + return false; + + if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) + return false; + + if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) + return true; + + if (transparent_hugepage_flags & + (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)) + return !!(vma->vm_flags & VM_HUGEPAGE); + + return false; +} + #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1< Date: Mon, 10 Jul 2017 15:48:25 -0700 Subject: mm: always enable thp for dax mappings The madvise policy for transparent huge pages is meant to avoid unwanted allocations of transparent huge pages. It allows a policy of disabling the extra memory pressure and effort to arrange for a huge page when it is not needed. DAX by definition never incurs this overhead since it is statically allocated. The policy choice makes even less sense for device-dax which tries to guarantee a given tlb-fault size. Specifically, the following setting: echo never > /sys/kernel/mm/transparent_hugepage/enabled ...violates that guarantee and silently disables all device-dax instances with a 2M or 1G alignment. So, let's avoid that non-obvious side effect by force enabling thp for dax mappings in all cases. It is worth noting that the reason this uses vma_is_dax(), and the resulting header include changes, is that previous attempts to add a VM_DAX flag were NAKd. Link: http://lkml.kernel.org/r/149739531127.20686.15813586620597484283.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: Ross Zwisler Cc: Jan Kara Cc: Christoph Hellwig Cc: "Kirill A. Shutemov" Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 5 ----- include/linux/fs.h | 6 ++++++ include/linux/huge_mm.h | 5 +++++ 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 8f39db7439c3..794811875732 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -154,11 +154,6 @@ static inline unsigned int dax_radix_order(void *entry) #endif int dax_pfn_mkwrite(struct vm_fault *vmf); -static inline bool vma_is_dax(struct vm_area_struct *vma) -{ - return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); -} - static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0cfa47125d52..78e1dbbe4cfd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -3127,6 +3128,11 @@ static inline bool io_is_direct(struct file *filp) return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host); } +static inline bool vma_is_dax(struct vm_area_struct *vma) +{ + return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); +} + static inline int iocb_flags(struct file *file) { int res = 0; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f4239d3c9c73..ee696347f928 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -3,6 +3,8 @@ #include +#include /* only for vma_is_dax() */ + extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf); extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, @@ -103,6 +105,9 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) return true; + if (vma_is_dax(vma)) + return true; + if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)) return !!(vma->vm_flags & VM_HUGEPAGE); -- cgit v1.2.3 From 108a7ac448caff8e35e8c3f92f65faad893e5aca Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Jul 2017 15:48:28 -0700 Subject: include/linux/page_ref.h: ensure page_ref_unfreeze is ordered against prior accesses page_ref_freeze and page_ref_unfreeze are designed to be used as a pair, wrapping a critical section where struct pages can be modified without having to worry about consistency for a concurrent fast-GUP. Whilst page_ref_freeze has full barrier semantics due to its use of atomic_cmpxchg, page_ref_unfreeze is implemented using atomic_set, which doesn't provide any barrier semantics and allows the operation to be reordered with respect to page modifications in the critical section. This patch ensures that page_ref_unfreeze is ordered after any critical section updates, by invoking smp_mb() prior to the atomic_set. Link: http://lkml.kernel.org/r/1497349722-6731-3-git-send-email-will.deacon@arm.com Signed-off-by: Will Deacon Acked-by: Steve Capper Acked-by: Kirill A. Shutemov Cc: Mark Rutland Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ref.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 610e13271918..1fd71733aa68 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -174,6 +174,7 @@ static inline void page_ref_unfreeze(struct page *page, int count) VM_BUG_ON_PAGE(page_count(page) != 0, page); VM_BUG_ON(count == 0); + smp_mb(); atomic_set(&page->_refcount, count); if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze)) __page_ref_unfreeze(page, count); -- cgit v1.2.3 From 4db9b2efe94967be34e3b136a93251a3c1736dd5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:48:44 -0700 Subject: hugetlb, memory_hotplug: prefer to use reserved pages for migration new_node_page will try to use the origin's next NUMA node as the migration destination for hugetlb pages. If such a node doesn't have any preallocated pool it falls back to __alloc_buddy_huge_page_no_mpol to allocate a surplus page instead. This is quite subotpimal for any configuration when hugetlb pages are no distributed to all NUMA nodes evenly. Say we have a hotplugable node 4 and spare hugetlb pages are node 0 /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages:10000 /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node3/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node4/hugepages/hugepages-2048kB/nr_hugepages:10000 /sys/devices/system/node/node5/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node6/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node7/hugepages/hugepages-2048kB/nr_hugepages:0 Now we consume the whole pool on node 4 and try to offline this node. All the allocated pages should be moved to node0 which has enough preallocated pages to hold them. With the current implementation offlining very likely fails because hugetlb allocations during runtime are much less reliable. Fix this by reusing the nodemask which excludes migration source and try to find a first node which has a page in the preallocated pool first and fall back to __alloc_buddy_huge_page_no_mpol only when the whole pool is consumed. [akpm@linux-foundation.org: remove bogus arg from alloc_huge_page_nodemask() stub] Link: http://lkml.kernel.org/r/20170608074553.22152-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Naoya Horiguchi Cc: Xishi Qiu Cc: zhong jiang Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 57f700ac127e..8fd0725d3f30 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -349,6 +349,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); +struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); @@ -524,6 +525,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr struct hstate {}; #define alloc_huge_page(v, a, r) NULL #define alloc_huge_page_node(h, nid) NULL +#define alloc_huge_page_nodemask(h, nmask) NULL #define alloc_huge_page_noerr(v, a, r) NULL #define alloc_bootmem_huge_page(h) NULL #define hstate_file(f) NULL -- cgit v1.2.3 From 8b9132388964df2cfe151a88fd1dd8219dabf23c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:48:47 -0700 Subject: mm: unify new_node_page and alloc_migrate_target Commit 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") has duplicated a large part of alloc_migrate_target with some hotplug specific special casing. To be more precise it tried to enfore the allocation from a different node than the original page. As a result the two function diverged in their shared logic, e.g. the hugetlb allocation strategy. Let's unify the two and express different NUMA requirements by the given nodemask. new_node_page will simply exclude the node it doesn't care about and alloc_migrate_target will use all the available nodes. alloc_migrate_target will then learn to migrate hugetlb pages more sanely and use preallocated pool when possible. Please note that alloc_migrate_target used to call alloc_page resp. alloc_pages_current so the memory policy of the current context which is quite strange when we consider that it is used in the context of alloc_contig_range which just tries to migrate pages which stand in the way. Link: http://lkml.kernel.org/r/20170608074553.22152-4-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Naoya Horiguchi Cc: Xishi Qiu Cc: zhong jiang Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 48e24844b3c5..d9675b665cc4 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -4,6 +4,7 @@ #include #include #include +#include typedef struct page *new_page_t(struct page *page, unsigned long private, int **reason); @@ -30,6 +31,21 @@ enum migrate_reason { /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ extern char *migrate_reason_names[MR_TYPES]; +static inline struct page *new_page_nodemask(struct page *page, + int preferred_nid, nodemask_t *nodemask) +{ + gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; + + if (PageHuge(page)) + return alloc_huge_page_nodemask(page_hstate(compound_head(page)), + nodemask); + + if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) + gfp_mask |= __GFP_HIGHMEM; + + return __alloc_pages_nodemask(gfp_mask, 0, preferred_nid, nodemask); +} + #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); -- cgit v1.2.3 From aaf14e40a33a2c9350471387031ca40c00f5a006 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:49:08 -0700 Subject: mm, hugetlb: unclutter hugetlb allocation layers Patch series "mm, hugetlb: allow proper node fallback dequeue". While working on a hugetlb migration issue addressed in a separate patchset[1] I have noticed that the hugetlb allocations from the preallocated pool are quite subotimal. [1] //lkml.kernel.org/r/20170608074553.22152-1-mhocko@kernel.org There is no fallback mechanism implemented and no notion of preferred node. I have tried to work around it but Vlastimil was right to push back for a more robust solution. It seems that such a solution is to reuse zonelist approach we use for the page alloctor. This series has 3 patches. The first one tries to make hugetlb allocation layers more clear. The second one implements the zonelist hugetlb pool allocation and introduces a preferred node semantic which is used by the migration callbacks. The last patch is a clean up. This patch (of 3): Hugetlb allocation path for fresh huge pages is unnecessarily complex and it mixes different interfaces between layers. __alloc_buddy_huge_page is the central place to perform a new allocation. It checks for the hugetlb overcommit and then relies on __hugetlb_alloc_buddy_huge_page to invoke the page allocator. This is all good except that __alloc_buddy_huge_page pushes vma and address down the callchain and so __hugetlb_alloc_buddy_huge_page has to deal with two different allocation modes - one for memory policy and other node specific (or to make it more obscure node non-specific) requests. This just screams for a reorganization. This patch pulls out all the vma specific handling up to __alloc_buddy_huge_page_with_mpol where it belongs. __alloc_buddy_huge_page will get nodemask argument and __hugetlb_alloc_buddy_huge_page will become a trivial wrapper over the page allocator. In short: __alloc_buddy_huge_page_with_mpol - memory policy handling __alloc_buddy_huge_page - overcommit handling and accounting __hugetlb_alloc_buddy_huge_page - page allocator layer Also note that __hugetlb_alloc_buddy_huge_page and its cpuset retry loop is not really needed because the page allocator already handles the cpusets update. Finally __hugetlb_alloc_buddy_huge_page had a special case for node specific allocations (when no policy is applied and there is a node given). This has relied on __GFP_THISNODE to not fallback to a different node. alloc_huge_page_node is the only caller which relies on this behavior so move the __GFP_THISNODE there. Not only does this remove quite some code it also should make those layers easier to follow and clear wrt responsibilities. Link: http://lkml.kernel.org/r/20170622193034.28972-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Mike Kravetz Tested-by: Mike Kravetz Cc: Naoya Horiguchi Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8fd0725d3f30..66b621469f52 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -349,7 +349,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask); +struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); -- cgit v1.2.3 From 3e59fcb0e8c1c40aecb60fa4c2d1543d6a097184 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:49:11 -0700 Subject: hugetlb: add support for preferred node to alloc_huge_page_nodemask alloc_huge_page_nodemask tries to allocate from any numa node in the allowed node mask starting from lower numa nodes. This might lead to filling up those low NUMA nodes while others are not used. We can reduce this risk by introducing a concept of the preferred node similar to what we have in the regular page allocator. We will start allocating from the preferred nid and then iterate over all allowed nodes in the zonelist order until we try them all. This is mimicing the page allocator logic except it operates on per-node mempools. dequeue_huge_page_vma already does this so distill the zonelist logic into a more generic dequeue_huge_page_nodemask and use it in alloc_huge_page_nodemask. This will allow us to use proper per numa distance fallback also for alloc_huge_page_node which can use alloc_huge_page_nodemask now and we can get rid of alloc_huge_page_node helper which doesn't have any user anymore. Link: http://lkml.kernel.org/r/20170622193034.28972-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Mike Kravetz Tested-by: Mike Kravetz Cc: Naoya Horiguchi Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 +++-- include/linux/migrate.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 66b621469f52..8d9fe131a240 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -349,7 +349,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask); +struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); @@ -525,7 +526,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr struct hstate {}; #define alloc_huge_page(v, a, r) NULL #define alloc_huge_page_node(h, nid) NULL -#define alloc_huge_page_nodemask(h, nmask) NULL +#define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL #define alloc_huge_page_noerr(v, a, r) NULL #define alloc_bootmem_huge_page(h) NULL #define hstate_file(f) NULL diff --git a/include/linux/migrate.h b/include/linux/migrate.h index d9675b665cc4..4634da521238 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -38,7 +38,7 @@ static inline struct page *new_page_nodemask(struct page *page, if (PageHuge(page)) return alloc_huge_page_nodemask(page_hstate(compound_head(page)), - nodemask); + preferred_nid, nodemask); if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; -- cgit v1.2.3 From 618b8c20d03c9ea06711bd36d906322ba35c0add Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 10 Jul 2017 15:49:32 -0700 Subject: include/linux/mmzone.h: remove ancient/ambiguous comment Currently pg_data_t is just a struct which describes a NUMA node memory layout. Let's keep the comment simple and remove ambiguity. Link: http://lkml.kernel.org/r/1498220534-22717-1-git-send-email-nborisov@suse.com Signed-off-by: Nikolay Borisov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7e8f100cb56d..16532fa0bb64 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -603,12 +603,9 @@ extern struct page *mem_map; #endif /* - * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM - * (mostly NUMA machines?) to denote a higher-level memory zone than the - * zone denotes. - * * On NUMA machines, each NUMA node would have a pg_data_t to describe - * it's memory layout. + * it's memory layout. On UMA machines there is a single pglist_data which + * describes the whole memory. * * Memory statistics and page replacement data structures are maintained on a * per-zone basis. -- cgit v1.2.3 From e3d3910a57ab9c70cddb2522ae711ff9bff89e7c Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 10 Jul 2017 15:49:35 -0700 Subject: include/linux/backing-dev.h: simplify wb_stat_sum wb_stat_sum() disables interrupts and calls __wb_stat_sum() which eventually calls __percpu_counter_sum(). However, the percpu routine is already irq-safe. Simplify the code a bit by making wb_stat_sum() directly call percpu_counter_sum_positive() and not disable interrupts. Also remove the now-uneeded __wb_stat_sum() which was just a wrapper over percpu_counter_sum_positive(). Link: http://lkml.kernel.org/r/1498230681-29103-1-git-send-email-nborisov@suse.com Signed-off-by: Nikolay Borisov Acked-by: Peter Zijlstra Cc: Tejun Heo Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ace73f96eb1e..334165c911f0 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -104,22 +104,9 @@ static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) return percpu_counter_read_positive(&wb->stat[item]); } -static inline s64 __wb_stat_sum(struct bdi_writeback *wb, - enum wb_stat_item item) -{ - return percpu_counter_sum_positive(&wb->stat[item]); -} - static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item) { - s64 sum; - unsigned long flags; - - local_irq_save(flags); - sum = __wb_stat_sum(wb, item); - local_irq_restore(flags); - - return sum; + return percpu_counter_sum_positive(&wb->stat[item]); } extern void wb_writeout_inc(struct bdi_writeback *wb); -- cgit v1.2.3 From 2c80cd57c74339889a8752b20862a16c28929c3a Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 10 Jul 2017 15:49:57 -0700 Subject: mm/list_lru.c: fix list_lru_count_node() to be race free list_lru_count_node() iterates over all memcgs to get the total number of entries on the node but it can race with memcg_drain_all_list_lrus(), which migrates the entries from a dead cgroup to another. This can return incorrect number of entries from list_lru_count_node(). Fix this by keeping track of entries per node and simply return it in list_lru_count_node(). Link: http://lkml.kernel.org/r/1498707555-30525-1-git-send-email-stummala@codeaurora.org Signed-off-by: Sahitya Tummala Acked-by: Vladimir Davydov Cc: Jan Kara Cc: Alexander Polakov Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index cb0ba9f2a9a2..fa7fd03cb5f9 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -44,6 +44,7 @@ struct list_lru_node { /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ struct list_lru_memcg *memcg_lrus; #endif + long nr_items; } ____cacheline_aligned_in_smp; struct list_lru { -- cgit v1.2.3 From a47fed5b5b014f5a13878b90ef2c3a7dc294189f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jul 2017 15:50:06 -0700 Subject: mm: swap: provide lru_add_drain_all_cpuslocked() The rework of the cpu hotplug locking unearthed potential deadlocks with the memory hotplug locking code. The solution for these is to rework the memory hotplug locking code as well and take the cpu hotplug lock before the memory hotplug lock in mem_hotplug_begin(), but this will cause a recursive locking of the cpu hotplug lock when the memory hotplug code calls lru_add_drain_all(). Split out the inner workings of lru_add_drain_all() into lru_add_drain_all_cpuslocked() so this function can be invoked from the memory hotplug code with the cpu hotplug lock held. Link: http://lkml.kernel.org/r/20170704093421.419329357@linutronix.de Signed-off-by: Thomas Gleixner Reported-by: Andrey Ryabinin Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Vladimir Davydov Cc: Peter Zijlstra Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 61e7180cee21..d83d28e53e62 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -277,6 +277,7 @@ extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); +extern void lru_add_drain_all_cpuslocked(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void mark_page_lazyfree(struct page *page); -- cgit v1.2.3 From 9d1f4b3f5b29bea431525e528a3ff2dc806ad904 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:50:12 -0700 Subject: mm: disallow early_pfn_to_nid on configurations which do not implement it early_pfn_to_nid will return node 0 if both HAVE_ARCH_EARLY_PFN_TO_NID and HAVE_MEMBLOCK_NODE_MAP are disabled. It seems we are safe now because all architectures which support NUMA define one of them (with an exception of alpha which however has CONFIG_NUMA marked as broken) so this works as expected. It can get silently and subtly broken too easily, though. Make sure we fail the compilation if NUMA is enabled and there is no proper implementation for this function. If that ever happens we know that either the specific configuration is invalid and the fix should either disable NUMA or enable one of the above configs. Link: http://lkml.kernel.org/r/20170704075803.15979-1-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Joonsoo Kim Cc: Yang Shi Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 16532fa0bb64..fc14b8b3f6ce 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1055,6 +1055,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) static inline unsigned long early_pfn_to_nid(unsigned long pfn) { + BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA)); return 0; } #endif -- cgit v1.2.3 From e9d5a48499391fe5b0615610858665ba8149e255 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 10 Jul 2017 15:50:58 -0700 Subject: linux/bug.h: correct formatting of block comment Correct these checkpatch.pl warnings: |WARNING: Block comments use * on subsequent lines |#34: FILE: include/linux/bug.h:34: |+/* Force a compilation error if condition is true, but also produce a |+ result (of value 0 and type size_t), so the expression can be used |WARNING: Block comments use a trailing */ on a separate line |#36: FILE: include/linux/bug.h:36: |+ aren't permitted). */ Link: http://lkml.kernel.org/r/20170525120316.24473-3-abbotti@mev.co.uk Signed-off-by: Ian Abbott Acked-by: Michal Nazarewicz Cc: Kees Cook Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Jakub Kicinski Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 687b557fc5eb..ca24007e2dc3 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -30,10 +30,12 @@ struct pt_regs; #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) -/* Force a compilation error if condition is true, but also produce a - result (of value 0 and type size_t), so the expression can be used - e.g. in a structure initializer (or where-ever else comma expressions - aren't permitted). */ +/* + * Force a compilation error if condition is true, but also produce a + * result (of value 0 and type size_t), so the expression can be used + * e.g. in a structure initializer (or where-ever else comma expressions + * aren't permitted). + */ #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) -- cgit v1.2.3 From 8cdd7cca9287abf4c849c01e2a4e8207ad3e3a82 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 10 Jul 2017 15:51:01 -0700 Subject: linux/bug.h: correct "(foo*)" should be "(foo *)" Correct this checkpatch.pl error: |ERROR: "(foo*)" should be "(foo *)" |#19: FILE: include/linux/bug.h:19: |+#define BUILD_BUG_ON_NULL(e) ((void*)0) Link: http://lkml.kernel.org/r/20170525120316.24473-4-abbotti@mev.co.uk Signed-off-by: Ian Abbott Acked-by: Michal Nazarewicz Cc: Kees Cook Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Jakub Kicinski Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index ca24007e2dc3..216a1b79653d 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -16,7 +16,7 @@ struct pt_regs; #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_ZERO(e) (0) -#define BUILD_BUG_ON_NULL(e) ((void*)0) +#define BUILD_BUG_ON_NULL(e) ((void *)0) #define BUILD_BUG_ON_INVALID(e) (0) #define BUILD_BUG_ON_MSG(cond, msg) (0) #define BUILD_BUG_ON(condition) (0) -- cgit v1.2.3 From 47e81e59d98b90727a02ceb486407eeed5eb8727 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 10 Jul 2017 15:51:04 -0700 Subject: linux/bug.h: correct "space required before that '-'" Correct these checkpatch.pl errors: |ERROR: space required before that '-' (ctx:OxO) |#37: FILE: include/linux/bug.h:37: |+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) |ERROR: space required before that '-' (ctx:OxO) |#38: FILE: include/linux/bug.h:38: |+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) I decided to wrap the bitfield expressions that begin with minus signs in parentheses rather than insert spaces before the minus signs. Link: http://lkml.kernel.org/r/20170525120316.24473-5-abbotti@mev.co.uk Signed-off-by: Ian Abbott Acked-by: Michal Nazarewicz Cc: Kees Cook Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Jakub Kicinski Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 216a1b79653d..483207cb99fb 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -36,8 +36,8 @@ struct pt_regs; * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). */ -#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) -#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); })) /* * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the -- cgit v1.2.3 From bc6245e5efd70c41eaf9334b1b5e646745cb0fb3 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 10 Jul 2017 15:51:07 -0700 Subject: bug: split BUILD_BUG stuff out into Including pulls in a lot of bloat from and that is not needed to call the BUILD_BUG() family of macros. Split them out into their own header, . Also correct some checkpatch.pl errors for the BUILD_BUG_ON_ZERO() and BUILD_BUG_ON_NULL() macros by adding parentheses around the bitfield widths that begin with a minus sign. Link: http://lkml.kernel.org/r/20170525120316.24473-6-abbotti@mev.co.uk Signed-off-by: Ian Abbott Acked-by: Michal Nazarewicz Acked-by: Kees Cook Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Jakub Kicinski Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 74 +---------------------------------------- include/linux/build_bug.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 73 deletions(-) create mode 100644 include/linux/build_bug.h (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 483207cb99fb..5d5554c874fd 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -3,6 +3,7 @@ #include #include +#include enum bug_trap_type { BUG_TRAP_TYPE_NONE = 0, @@ -13,82 +14,9 @@ enum bug_trap_type { struct pt_regs; #ifdef __CHECKER__ -#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) -#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) -#define BUILD_BUG_ON_ZERO(e) (0) -#define BUILD_BUG_ON_NULL(e) ((void *)0) -#define BUILD_BUG_ON_INVALID(e) (0) -#define BUILD_BUG_ON_MSG(cond, msg) (0) -#define BUILD_BUG_ON(condition) (0) -#define BUILD_BUG() (0) #define MAYBE_BUILD_BUG_ON(cond) (0) #else /* __CHECKER__ */ -/* Force a compilation error if a constant expression is not a power of 2 */ -#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ - BUILD_BUG_ON(((n) & ((n) - 1)) != 0) -#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ - BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) - -/* - * Force a compilation error if condition is true, but also produce a - * result (of value 0 and type size_t), so the expression can be used - * e.g. in a structure initializer (or where-ever else comma expressions - * aren't permitted). - */ -#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); })) -#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); })) - -/* - * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the - * expression but avoids the generation of any code, even if that expression - * has side-effects. - */ -#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) - -/** - * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied - * error message. - * @condition: the condition which the compiler should know is false. - * - * See BUILD_BUG_ON for description. - */ -#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) - -/** - * BUILD_BUG_ON - break compile if a condition is true. - * @condition: the condition which the compiler should know is false. - * - * If you have some code which relies on certain constants being equal, or - * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to - * detect if someone changes it. - * - * The implementation uses gcc's reluctance to create a negative array, but gcc - * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to - * inline functions). Luckily, in 4.3 they added the "error" function - * attribute just for this type of case. Thus, we use a negative sized array - * (should always create an error on gcc versions older than 4.4) and then call - * an undefined function with the error attribute (should always create an - * error on gcc 4.3 and later). If for some reason, neither creates a - * compile-time error, we'll still have a link-time error, which is harder to - * track down. - */ -#ifndef __OPTIMIZE__ -#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#else -#define BUILD_BUG_ON(condition) \ - BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) -#endif - -/** - * BUILD_BUG - break compile if used. - * - * If you have some code that you expect the compiler to eliminate at - * build time, you should use BUILD_BUG to detect if it is - * unexpectedly used. - */ -#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") - #define MAYBE_BUILD_BUG_ON(cond) \ do { \ if (__builtin_constant_p((cond))) \ diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h new file mode 100644 index 000000000000..b7d22d60008a --- /dev/null +++ b/include/linux/build_bug.h @@ -0,0 +1,84 @@ +#ifndef _LINUX_BUILD_BUG_H +#define _LINUX_BUILD_BUG_H + +#include + +#ifdef __CHECKER__ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) +#define BUILD_BUG_ON_ZERO(e) (0) +#define BUILD_BUG_ON_NULL(e) ((void *)0) +#define BUILD_BUG_ON_INVALID(e) (0) +#define BUILD_BUG_ON_MSG(cond, msg) (0) +#define BUILD_BUG_ON(condition) (0) +#define BUILD_BUG() (0) +#else /* __CHECKER__ */ + +/* Force a compilation error if a constant expression is not a power of 2 */ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON(((n) & ((n) - 1)) != 0) +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) + +/* + * Force a compilation error if condition is true, but also produce a + * result (of value 0 and type size_t), so the expression can be used + * e.g. in a structure initializer (or where-ever else comma expressions + * aren't permitted). + */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); })) + +/* + * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the + * expression but avoids the generation of any code, even if that expression + * has side-effects. + */ +#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) + +/** + * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied + * error message. + * @condition: the condition which the compiler should know is false. + * + * See BUILD_BUG_ON for description. + */ +#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) + +/** + * BUILD_BUG_ON - break compile if a condition is true. + * @condition: the condition which the compiler should know is false. + * + * If you have some code which relies on certain constants being equal, or + * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * detect if someone changes it. + * + * The implementation uses gcc's reluctance to create a negative array, but gcc + * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to + * inline functions). Luckily, in 4.3 they added the "error" function + * attribute just for this type of case. Thus, we use a negative sized array + * (should always create an error on gcc versions older than 4.4) and then call + * an undefined function with the error attribute (should always create an + * error on gcc 4.3 and later). If for some reason, neither creates a + * compile-time error, we'll still have a link-time error, which is harder to + * track down. + */ +#ifndef __OPTIMIZE__ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +#define BUILD_BUG_ON(condition) \ + BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) +#endif + +/** + * BUILD_BUG - break compile if used. + * + * If you have some code that you expect the compiler to eliminate at + * build time, you should use BUILD_BUG to detect if it is + * unexpectedly used. + */ +#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") + +#endif /* __CHECKER__ */ + +#endif /* _LINUX_BUILD_BUG_H */ -- cgit v1.2.3 From 287f3ca563d8ba0ede4ac0cec84218a1ea5e848f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 10 Jul 2017 15:51:10 -0700 Subject: ARM: fix rd_size declaration The global variable 'rd_size' is declared as 'int' in source file arch/arm/kernel/atags_parse.c and as 'unsigned long' in drivers/block/brd.c. Fix this inconsistency. Additionally, remove the declarations of rd_image_start, rd_prompt and rd_doload from parse_tag_ramdisk() since these duplicate existing declarations in . Link: http://lkml.kernel.org/r/20170627065024.12347-1-bart.vanassche@wdc.com Signed-off-by: Bart Van Assche Acked-by: Russell King Cc: Jens Axboe Cc: Jan Kara Cc: Jason Yan Cc: Zhaohongjiang Cc: Miao Xie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/initrd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 55289d261b4f..bc67b767f9ce 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -10,6 +10,9 @@ extern int rd_prompt; /* starting block # of image */ extern int rd_image_start; +/* size of a single RAM disk */ +extern unsigned long rd_size; + /* 1 if it is not an error if initrd_start < memory_start */ extern int initrd_below_start_ok; -- cgit v1.2.3 From e5af323c9badd5dc09af7ccf9d45616ebffc623c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 10 Jul 2017 15:51:29 -0700 Subject: bitmap: optimise bitmap_set and bitmap_clear of a single bit We have eight users calling bitmap_clear for a single bit and seventeen calling bitmap_set for a single bit. Rather than fix all of them to call __clear_bit or __set_bit, turn bitmap_clear and bitmap_set into inline functions and make this special case efficient. Link: http://lkml.kernel.org/r/20170628153221.11322-3-willy@infradead.org Signed-off-by: Matthew Wilcox Acked-by: Rasmus Villemoes Cc: Martin Schwidefsky Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3b77588a9360..4e0f0c8167af 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -112,9 +112,8 @@ extern int __bitmap_intersects(const unsigned long *bitmap1, extern int __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); - -extern void bitmap_set(unsigned long *map, unsigned int start, int len); -extern void bitmap_clear(unsigned long *map, unsigned int start, int len); +extern void __bitmap_set(unsigned long *map, unsigned int start, int len); +extern void __bitmap_clear(unsigned long *map, unsigned int start, int len); extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map, unsigned long size, @@ -315,6 +314,24 @@ static __always_inline int bitmap_weight(const unsigned long *src, unsigned int return __bitmap_weight(src, nbits); } +static __always_inline void bitmap_set(unsigned long *map, unsigned int start, + unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __set_bit(start, map); + else + __bitmap_set(map, start, nbits); +} + +static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, + unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __clear_bit(start, map); + else + __bitmap_clear(map, start, nbits); +} + static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, int nbits) { -- cgit v1.2.3 From 2a98dc028f911a7c59c87d11d4eed6626be1605b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 10 Jul 2017 15:51:32 -0700 Subject: include/linux/bitmap.h: turn bitmap_set and bitmap_clear into memset when possible Several callers have constant 'start' and an 'nbits' that is a multiple of 8, so we can turn them into calls to memset. We don't need the entirety of 'start' and 'nbits' to be constant, we just need to know whether they're divisible by 8. Link: http://lkml.kernel.org/r/20170628153221.11322-4-willy@infradead.org Signed-off-by: Matthew Wilcox Acked-by: Rasmus Villemoes Cc: Martin Schwidefsky Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 4e0f0c8167af..c04c9d155e59 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -319,6 +319,9 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start, { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); + else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) && + __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8)) + memset((char *)map + start / 8, 0xff, nbits / 8); else __bitmap_set(map, start, nbits); } @@ -328,6 +331,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); + else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) && + __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8)) + memset((char *)map + start / 8, 0, nbits / 8); else __bitmap_clear(map, start, nbits); } -- cgit v1.2.3 From 2c6deb01525ac11cc03c44fe31e3f45ce2cadaf9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 10 Jul 2017 15:51:35 -0700 Subject: bitmap: use memcmp optimisation in more situations Commit 7dd968163f7c ("bitmap: bitmap_equal memcmp optimization") was rather more restrictive than necessary; we can use memcmp() to implement bitmap_equal() as long as the number of bits can be proved to be a multiple of 8. And architectures other than s390 may be able to make good use of this optimisation. [arnd@arndb.de: fix build: add a memcmp() declaration] Link: http://lkml.kernel.org/r/20170630153908.3439707-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20170628153221.11322-5-willy@infradead.org Signed-off-by: Matthew Wilcox Signed-off-by: Arnd Bergmann Acked-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index c04c9d155e59..5797ca6fdfe2 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -266,10 +266,8 @@ static inline int bitmap_equal(const unsigned long *src1, { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); -#ifdef CONFIG_S390 - if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0) + if (__builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8)) return !memcmp(src1, src2, nbits / 8); -#endif return __bitmap_equal(src1, src2, nbits); } -- cgit v1.2.3 From a94c33dd1f677d16c4f1a162b4b3e9eba1b07c24 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Mon, 10 Jul 2017 15:51:58 -0700 Subject: lib/extable.c: use bsearch() library function in search_extable() [thomas@m3y3r.de: v3: fix arch specific implementations] Link: http://lkml.kernel.org/r/1497890858.12931.7.camel@m3y3r.de Signed-off-by: Thomas Meyer Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/extable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extable.h b/include/linux/extable.h index 7effea4b257d..28addad0dda7 100644 --- a/include/linux/extable.h +++ b/include/linux/extable.h @@ -2,13 +2,14 @@ #define _LINUX_EXTABLE_H #include /* for NULL */ +#include struct module; struct exception_table_entry; const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, +search_extable(const struct exception_table_entry *base, + const size_t num, unsigned long value); void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish); -- cgit v1.2.3 From 1d278a879081ddc40286500e58868aaee47de257 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Jul 2017 16:25:53 +0100 Subject: VFS: Kill off s_options and helpers Kill off s_options, save/replace_mount_options() and generic_show_options() as all filesystems now implement ->show_options() for themselves. This should make it easier to implement a context-based mount where the mount options can be passed individually over a file descriptor. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/fs.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index bc0c054894b9..e265b2ea72c6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1351,11 +1351,6 @@ struct super_block { */ char *s_subtype; - /* - * Saved mount options for lazy filesystems using - * generic_show_options() - */ - char __rcu *s_options; const struct dentry_operations *s_d_op; /* default d_op for dentries */ /* @@ -3033,10 +3028,6 @@ extern void setattr_copy(struct inode *inode, const struct iattr *attr); extern int file_update_time(struct file *file); -extern int generic_show_options(struct seq_file *m, struct dentry *root); -extern void save_mount_options(struct super_block *sb, char *options); -extern void replace_mount_options(struct super_block *sb, char *options); - static inline bool io_is_direct(struct file *filp) { return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host); -- cgit v1.2.3 From 2fd1d2c4ceb2248a727696962cf3370dc9f5a0a4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 6 Jul 2017 08:41:06 -0500 Subject: proc: Fix proc_sys_prune_dcache to hold a sb reference Andrei Vagin writes: FYI: This bug has been reproduced on 4.11.7 > BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo} still in use (1) [unmount of proc proc] > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80 > CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1 > Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015 > Workqueue: events proc_cleanup_work > Call Trace: > dump_stack+0x63/0x86 > __warn+0xcb/0xf0 > warn_slowpath_null+0x1d/0x20 > umount_check+0x6e/0x80 > d_walk+0xc6/0x270 > ? dentry_free+0x80/0x80 > do_one_tree+0x26/0x40 > shrink_dcache_for_umount+0x2d/0x90 > generic_shutdown_super+0x1f/0xf0 > kill_anon_super+0x12/0x20 > proc_kill_sb+0x40/0x50 > deactivate_locked_super+0x43/0x70 > deactivate_super+0x5a/0x60 > cleanup_mnt+0x3f/0x90 > mntput_no_expire+0x13b/0x190 > kern_unmount+0x3e/0x50 > pid_ns_release_proc+0x15/0x20 > proc_cleanup_work+0x15/0x20 > process_one_work+0x197/0x450 > worker_thread+0x4e/0x4a0 > kthread+0x109/0x140 > ? process_one_work+0x450/0x450 > ? kthread_park+0x90/0x90 > ret_from_fork+0x2c/0x40 > ---[ end trace e1c109611e5d0b41 ]--- > VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds. Have a nice day... > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: _raw_spin_lock+0xc/0x30 > PGD 0 Fix this by taking a reference to the super block in proc_sys_prune_dcache. The superblock reference is the core of the fix however the sysctl_inodes list is converted to a hlist so that hlist_del_init_rcu may be used. This allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while not causing problems for proc_sys_evict_inode when if it later choses to remove the inode from the sysctl_inodes list. Removing inodes from the sysctl_inodes list allows proc_sys_prune_dcache to have a progress guarantee, while still being able to drop all locks. The fact that head->unregistering is set in start_unregistering ensures that no more inodes will be added to the the sysctl_inodes list. Previously the code did a dance where it delayed calling iput until the next entry in the list was being considered to ensure the inode remained on the sysctl_inodes list until the next entry was walked to. The structure of the loop in this patch does not need that so is much easier to understand and maintain. Cc: stable@vger.kernel.org Reported-by: Andrei Vagin Tested-by: Andrei Vagin Fixes: ace0c791e6c3 ("proc/sysctl: Don't grab i_lock under sysctl_lock.") Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering") Signed-off-by: "Eric W. Biederman" --- include/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 80d07816def0..1c04a26bfd2f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -143,7 +143,7 @@ struct ctl_table_header struct ctl_table_set *set; struct ctl_dir *parent; struct ctl_node *node; - struct list_head inodes; /* head for proc_inode->sysctl_inodes */ + struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */ }; struct ctl_dir { -- cgit v1.2.3 From e8158b486d5f3f55cf372c5a32b42f263bf7f123 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 11 Jul 2017 18:20:20 +0300 Subject: device property: Introduce fwnode_call_bool_op() for ops that return bool fwnode_call_int_op() isn't suitable for calling ops that return bool since it effectively causes the result returned to the user to be true when an op hasn't been defined or the fwnode is NULL. Address this by introducing fwnode_call_bool_op() for calling ops that return bool. Fixes: 3708184afc77 "device property: Move FW type specific functionality to FW specific files" Fixes: 2294b3af05e9 "device property: Introduce fwnode_device_is_available()" Reported-by: Dan Carpenter Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 9ab375419189..50893a1646cf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -99,6 +99,10 @@ struct fwnode_operations { (fwnode ? (fwnode_has_op(fwnode, op) ? \ (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \ -EINVAL) +#define fwnode_call_bool_op(fwnode, op, ...) \ + (fwnode ? (fwnode_has_op(fwnode, op) ? \ + (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : false) : \ + false) #define fwnode_call_ptr_op(fwnode, op, ...) \ (fwnode_has_op(fwnode, op) ? \ (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL) -- cgit v1.2.3 From 8c6ae4980e70395cbdfdf605c29673c5a6a89d9a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 30 Jun 2017 12:03:54 -0400 Subject: sunrpc: Allocate up to RPCSVC_MAXPAGES per svc_rqst svcrdma needs 259 pages allocated to receive 1MB NFSv4.0 WRITE requests: - 1 page for the transport header and head iovec - 256 pages for the data payload - 1 page for the trailing GETATTR request (since NFSD XDR decoding does not look for a tail iovec, the GETATTR is stuck at the end of the rqstp->rq_arg.pages list) - 1 page for building the reply xdr_buf But RPCSVC_MAXPAGES is already 259 (on x86_64). The problem is that svc_alloc_arg never allocates that many pages. To address this: 1. The final element of rq_pages always points to NULL. To accommodate up to 259 pages in rq_pages, add an extra element to rq_pages for the array termination sentinel. 2. Adjust the calculation of "pages" to match how RPCSVC_MAXPAGES is calculated, so it can go up to 259. Bruce noted that the calculation assumes sv_max_mesg is a multiple of PAGE_SIZE, which might not always be true. I didn't change this assumption. 3. Change the loop boundaries to allow 259 pages to be allocated. Additional clean-up: WARN_ON_ONCE adds an extra conditional branch, which is basically never taken. And there's no need to dump the stack here because svc_alloc_arg has only one caller. Keeping that NULL "array termination sentinel"; there doesn't appear to be any code that depends on it, only code in nfsd_splice_actor() which needs the 259th element to be initialized to *something*. So it's possible we could just keep the array at 259 elements and drop that final NULL, but we're being conservative for now. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index eec04982a7ea..a3f8af9bd543 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -246,7 +246,7 @@ struct svc_rqst { size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; struct xdr_buf rq_res; - struct page * rq_pages[RPCSVC_MAXPAGES]; + struct page *rq_pages[RPCSVC_MAXPAGES + 1]; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ struct page * *rq_page_end; /* one past the last page */ -- cgit v1.2.3 From 026d958b38c628a1b4ced534808945365e2747a5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:18:24 -0400 Subject: svcrdma: Add recvfrom helpers to svc_rdma_rw.c svc_rdma_rw.c already contains helpers for the sendto path. Introduce helpers for the recvfrom path. The plan is to replace the local NFSD bespoke code that constructs and posts RDMA Read Work Requests with calls to the rdma_rw API. This shares code with other RDMA-enabled ULPs that manages the gory details of buffer registration and posting Work Requests. This new code also puts all RDMA_NOMSG-specific logic in one place. Lastly, the use of rqstp->rq_arg.pages is deprecated in favor of using rqstp->rq_pages directly, for clarity. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 3ca991657889..cf5d5412298b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -196,6 +196,9 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, /* svc_rdma_rw.c */ extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); +extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, -- cgit v1.2.3 From cafc739892f34b9090413179ca259409fc43bfae Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:18:33 -0400 Subject: svcrdma: Use generic RDMA R/W API in RPC Call path The current svcrdma recvfrom code path has a lot of detail about registration mode and the type of port (iWARP, IB, etc). Instead, use the RDMA core's generic R/W API. This shares code with other RDMA-enabled ULPs that manages the gory details of buffer registration and the posting of RDMA Read Work Requests. Since the Read list marshaling code is being replaced, I took the opportunity to replace C structure-based XDR encoding code with more portable code that uses pointer arithmetic. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cf5d5412298b..b1ba19ba1071 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -82,10 +82,7 @@ struct svc_rdma_op_ctxt { int hdr_count; struct xdr_buf arg; struct ib_cqe cqe; - struct ib_cqe reg_cqe; - struct ib_cqe inv_cqe; u32 byte_len; - u32 position; struct svcxprt_rdma *xprt; unsigned long flags; enum dma_data_direction direction; @@ -116,7 +113,6 @@ struct svcxprt_rdma { struct list_head sc_accept_q; /* Conn. waiting accept */ int sc_ord; /* RDMA read limit */ int sc_max_sge; - int sc_max_sge_rd; /* max sge for read target */ bool sc_snd_w_inv; /* OK to use Send With Invalidate */ atomic_t sc_sq_avail; /* SQEs ready to be consumed */ @@ -141,10 +137,6 @@ struct svcxprt_rdma { struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; - int (*sc_reader)(struct svcxprt_rdma *, - struct svc_rqst *, - struct svc_rdma_op_ctxt *, - int *, u32 *, u32, u32, u64, bool); u32 sc_dev_caps; /* distilled device caps */ unsigned int sc_frmr_pg_list_len; struct list_head sc_frmr_q; @@ -187,12 +179,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); -extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *, - struct svc_rdma_op_ctxt *, int *, u32 *, - u32, u32, u64, bool); -extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, - struct svc_rdma_op_ctxt *, int *, u32 *, - u32, u32, u64, bool); /* svc_rdma_rw.c */ extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); -- cgit v1.2.3 From c84dc900d737a8d8f08768622226980ee863403b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:18:49 -0400 Subject: svcrdma: Remove unused Read completion handlers Clean up: The generic RDMA R/W API conversion of svc_rdma_recvfrom replaced the Register, Read, and Invalidate completion handlers. Remove the old ones, which are no longer used. These handlers shared some helper code with svc_rdma_wc_send. Fold the wc_common helper back into the one remaining completion handler. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index b1ba19ba1071..06d58a3f74bc 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -77,17 +77,15 @@ extern atomic_t rdma_stat_sq_prod; */ struct svc_rdma_op_ctxt { struct list_head list; - struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_fastreg_mr *frmr; - int hdr_count; struct xdr_buf arg; struct ib_cqe cqe; u32 byte_len; struct svcxprt_rdma *xprt; - unsigned long flags; enum dma_data_direction direction; int count; unsigned int mapped_sges; + int hdr_count; struct ib_send_wr send_wr; struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE]; struct page *pages[RPCSVC_MAXPAGES]; -- cgit v1.2.3 From 463e63d7014442002399903af027b63ae38f6e77 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:18:57 -0400 Subject: svcrdma: Remove frmr cache Clean up: Now that the svc_rdma_recvfrom path uses the rdma_rw API, the details of Read sink buffer registration are dealt with by the kernel's RDMA core. This cache is no longer used, and can be removed. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 06d58a3f74bc..fd7775f70bb5 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -77,7 +77,6 @@ extern atomic_t rdma_stat_sq_prod; */ struct svc_rdma_op_ctxt { struct list_head list; - struct svc_rdma_fastreg_mr *frmr; struct xdr_buf arg; struct ib_cqe cqe; u32 byte_len; @@ -91,17 +90,6 @@ struct svc_rdma_op_ctxt { struct page *pages[RPCSVC_MAXPAGES]; }; -struct svc_rdma_fastreg_mr { - struct ib_mr *mr; - struct scatterlist *sg; - int sg_nents; - unsigned long access_flags; - enum dma_data_direction direction; - struct list_head frmr_list; -}; - -#define RDMACTXT_F_LAST_CTXT 2 - #define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ #define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ @@ -136,9 +124,6 @@ struct svcxprt_rdma { struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; u32 sc_dev_caps; /* distilled device caps */ - unsigned int sc_frmr_pg_list_len; - struct list_head sc_frmr_q; - spinlock_t sc_frmr_q_lock; spinlock_t sc_lock; /* transport lock */ @@ -210,9 +195,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); -extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); -extern void svc_rdma_put_frmr(struct svcxprt_rdma *, - struct svc_rdma_fastreg_mr *); extern void svc_sq_reap(struct svcxprt_rdma *); extern void svc_rq_reap(struct svcxprt_rdma *); extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); -- cgit v1.2.3 From 9450ca8e2febb0000a5efd4f5870915d59ae62bc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:19:13 -0400 Subject: svcrdma: Clean up after converting svc_rdma_recvfrom to rdma_rw API Clean up: Registration mode details are now handled by the rdma_rw API, and thus can be removed from svcrdma. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index fd7775f70bb5..995c6fe9ee90 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -90,9 +90,6 @@ struct svc_rdma_op_ctxt { struct page *pages[RPCSVC_MAXPAGES]; }; -#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ -#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ - struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ @@ -123,7 +120,6 @@ struct svcxprt_rdma { struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; - u32 sc_dev_caps; /* distilled device caps */ spinlock_t sc_lock; /* transport lock */ -- cgit v1.2.3 From 40bf6a35483ee25271ce2a90d8976cf1409a033a Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 12 Jul 2017 10:23:13 +0200 Subject: rtc: Remove wrong deprecation comment rtc_time_to_tm and rtc_tm_to_time are not deprecated and make perfect sense for RTCs that are simple 32bit counters. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index d53ecdc060cf..0a0f0d14a5fb 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -33,17 +33,11 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs) return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs); } -/** - * Deprecated. Use rtc_time64_to_tm(). - */ static inline void rtc_time_to_tm(unsigned long time, struct rtc_time *tm) { rtc_time64_to_tm(time, tm); } -/** - * Deprecated. Use rtc_tm_to_time64(). - */ static inline int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time) { *time = rtc_tm_to_time64(tm); -- cgit v1.2.3 From 0a2c13d9cd76c84f2520f573ff83f777eb7464aa Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Jul 2017 14:33:01 -0700 Subject: include/linux/dcache.h: use unsigned chars in struct name_snapshot "kernel.h: handle pointers to arrays better in container_of()" triggers: In file included from include/uapi/linux/stddef.h:1:0, from include/linux/stddef.h:4, from include/uapi/linux/posix_types.h:4, from include/uapi/linux/types.h:13, from include/linux/types.h:5, from include/linux/syscalls.h:71, from fs/dcache.c:17: fs/dcache.c: In function 'release_dentry_name_snapshot': include/linux/compiler.h:542:38: error: call to '__compiletime_assert_305' declared with attribute error: pointer type mismatch in container_of() _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^ include/linux/compiler.h:525:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ include/linux/compiler.h:542:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^ include/linux/build_bug.h:46:37: note: in expansion of macro 'compiletime_assert' #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^ include/linux/kernel.h:860:2: note: in expansion of macro 'BUILD_BUG_ON_MSG' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^ fs/dcache.c:305:7: note: in expansion of macro 'container_of' p = container_of(name->name, struct external_name, name[0]); Switch name_snapshot to use unsigned chars, matching struct qstr and struct external_name. Link: http://lkml.kernel.org/r/20170710152134.0f78c1e6@canb.auug.org.au Signed-off-by: Stephen Rothwell Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 025727bf6797..c706eaac692e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -592,8 +592,8 @@ static inline struct inode *d_real_inode(const struct dentry *dentry) } struct name_snapshot { - const char *name; - char inline_name[DNAME_INLINE_LEN]; + const unsigned char *name; + unsigned char inline_name[DNAME_INLINE_LEN]; }; void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); -- cgit v1.2.3 From c7acec713d14c6ce8a20154f9dfda258d6bcad3b Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 12 Jul 2017 14:33:04 -0700 Subject: kernel.h: handle pointers to arrays better in container_of() If the first parameter of container_of() is a pointer to a non-const-qualified array type (and the third parameter names a non-const-qualified array member), the local variable __mptr will be defined with a const-qualified array type. In ISO C, these types are incompatible. They work as expected in GNU C, but some versions will issue warnings. For example, GCC 4.9 produces the warning "initialization from incompatible pointer type". Here is an example of where the problem occurs: ------------------------------------------------------- #include #include MODULE_LICENSE("GPL"); struct st { int a; char b[16]; }; static int __init example_init(void) { struct st t = { .a = 101, .b = "hello" }; char (*p)[16] = &t.b; struct st *x = container_of(p, struct st, b); printk(KERN_DEBUG "%p %p\n", (void *)&t, (void *)x); return 0; } static void __exit example_exit(void) { } module_init(example_init); module_exit(example_exit); ------------------------------------------------------- Building the module with gcc-4.9 results in these warnings (where '{m}' is the module source and '{k}' is the kernel source): ------------------------------------------------------- In file included from {m}/example.c:1:0: {m}/example.c: In function `example_init': {k}/include/linux/kernel.h:854:48: warning: initialization from incompatible pointer type const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ^ {m}/example.c:14:17: note: in expansion of macro `container_of' struct st *x = container_of(p, struct st, b); ^ {k}/include/linux/kernel.h:854:48: warning: (near initialization for `x') const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ^ {m}/example.c:14:17: note: in expansion of macro `container_of' struct st *x = container_of(p, struct st, b); ^ ------------------------------------------------------- Replace the type checking performed by the macro to avoid these warnings. Make sure `*(ptr)` either has type compatible with the member, or has type compatible with `void`, ignoring qualifiers. Raise compiler errors if this is not true. This is stronger than the previous behaviour, which only resulted in compiler warnings for a type mismatch. [arnd@arndb.de: fix new warnings for container_of()] Link: http://lkml.kernel.org/r/20170620200940.90557-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20170525120316.24473-7-abbotti@mev.co.uk Signed-off-by: Ian Abbott Signed-off-by: Arnd Bergmann Acked-by: Michal Nazarewicz Acked-by: Kees Cook Cc: Hidehiro Kawai Cc: Borislav Petkov Cc: Rasmus Villemoes Cc: Johannes Berg Cc: Peter Zijlstra Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 1c91f26e2996..bd6d96cf80b1 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -854,9 +855,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @member: the name of the member within the struct. * */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) +#define container_of(ptr, type, member) ({ \ + void *__mptr = (void *)(ptr); \ + BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ + !__same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ + ((type *)(__mptr - offsetof(type, member))); }) /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD -- cgit v1.2.3 From 203e9e41219b4e7357104e525e91ac609fba2c6c Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 12 Jul 2017 14:33:14 -0700 Subject: kexec: move vmcoreinfo out of the kernel's .bss section As Eric said, "what we need to do is move the variable vmcoreinfo_note out of the kernel's .bss section. And modify the code to regenerate and keep this information in something like the control page. Definitely something like this needs a page all to itself, and ideally far away from any other kernel data structures. I clearly was not watching closely the data someone decided to keep this silly thing in the kernel's .bss section." This patch allocates extra pages for these vmcoreinfo_XXX variables, one advantage is that it enhances some safety of vmcoreinfo, because vmcoreinfo now is kept far away from other kernel data structures. Link: http://lkml.kernel.org/r/1493281021-20737-1-git-send-email-xlpang@redhat.com Signed-off-by: Xunlei Pang Tested-by: Michael Holzheu Reviewed-by: Juergen Gross Suggested-by: Eric Biederman Cc: Benjamin Herrenschmidt Cc: Dave Young Cc: Hari Bathini Cc: Mahesh Salgaonkar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 4090a42578a8..87506a02e914 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -19,7 +19,7 @@ CRASH_CORE_NOTE_NAME_BYTES + \ CRASH_CORE_NOTE_DESC_BYTES) -#define VMCOREINFO_BYTES (4096) +#define VMCOREINFO_BYTES PAGE_SIZE #define VMCOREINFO_NOTE_NAME "VMCOREINFO" #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) #define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ @@ -56,7 +56,7 @@ phys_addr_t paddr_vmcoreinfo_note(void); #define VMCOREINFO_CONFIG(name) \ vmcoreinfo_append_str("CONFIG_%s=y\n", #name) -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; +extern u32 *vmcoreinfo_note; extern size_t vmcoreinfo_size; extern size_t vmcoreinfo_max_size; -- cgit v1.2.3 From 5203f4995d9a87952a83c2ce7866adbbe8f97bb5 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 12 Jul 2017 14:33:17 -0700 Subject: powerpc/fadump: use the correct VMCOREINFO_NOTE_SIZE for phdr vmcoreinfo_max_size stands for the vmcoreinfo_data, the correct one we should use is vmcoreinfo_note whose total size is VMCOREINFO_NOTE_SIZE. Like explained in commit 77019967f06b ("kdump: fix exported size of vmcoreinfo note"), it should not affect the actual function, but we better fix it, also this change should be safe and backward compatible. After this, we can get rid of variable vmcoreinfo_max_size, let's use the corresponding macros directly, fewer variables means more safety for vmcoreinfo operation. [xlpang@redhat.com: fix build warning] Link: http://lkml.kernel.org/r/1494830606-27736-1-git-send-email-xlpang@redhat.com Link: http://lkml.kernel.org/r/1493281021-20737-2-git-send-email-xlpang@redhat.com Signed-off-by: Xunlei Pang Reviewed-by: Mahesh Salgaonkar Reviewed-by: Dave Young Cc: Hari Bathini Cc: Benjamin Herrenschmidt Cc: Eric Biederman Cc: Juergen Gross Cc: Michael Holzheu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 87506a02e914..e5df1b3cf072 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -58,7 +58,6 @@ phys_addr_t paddr_vmcoreinfo_note(void); extern u32 *vmcoreinfo_note; extern size_t vmcoreinfo_size; -extern size_t vmcoreinfo_max_size; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); -- cgit v1.2.3 From 1229384f5b856d83698c38f9dedfd836e26711cb Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 12 Jul 2017 14:33:21 -0700 Subject: kdump: protect vmcoreinfo data under the crash memory Currently vmcoreinfo data is updated at boot time subsys_initcall(), it has the risk of being modified by some wrong code during system is running. As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on, when using "crash", "makedumpfile", etc utility to parse this vmcore, we probably will get "Segmentation fault" or other unexpected errors. E.g. 1) wrong code overwrites vmcoreinfo_data; 2) further crashes the system; 3) trigger kdump, then we obviously will fail to recognize the crash context correctly due to the corrupted vmcoreinfo. Now except for vmcoreinfo, all the crash data is well protected(including the cpu note which is fully updated in the crash path, thus its correctness is guaranteed). Given that vmcoreinfo data is a large chunk prepared for kdump, we better protect it as well. To solve this, we relocate and copy vmcoreinfo_data to the crash memory when kdump is loading via kexec syscalls. Because the whole crash memory will be protected by existing arch_kexec_protect_crashkres() mechanism, we naturally protect vmcoreinfo_data from write(even read) access under kernel direct mapping after kdump is loaded. Since kdump is usually loaded at the very early stage after boot, we can trust the correctness of the vmcoreinfo data copied. On the other hand, we still need to operate the vmcoreinfo safe copy when crash happens to generate vmcoreinfo_note again, we rely on vmap() to map out a new kernel virtual address and update to use this new one instead in the following crash_save_vmcoreinfo(). BTW, we do not touch vmcoreinfo_note, because it will be fully updated using the protected vmcoreinfo_data after crash which is surely correct just like the cpu crash note. Link: http://lkml.kernel.org/r/1493281021-20737-3-git-send-email-xlpang@redhat.com Signed-off-by: Xunlei Pang Tested-by: Michael Holzheu Cc: Benjamin Herrenschmidt Cc: Dave Young Cc: Eric Biederman Cc: Hari Bathini Cc: Juergen Gross Cc: Mahesh Salgaonkar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 2 +- include/linux/kexec.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index e5df1b3cf072..2df2118fbe13 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -28,6 +28,7 @@ typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; +void crash_update_vmcoreinfo_safecopy(void *ptr); void crash_save_vmcoreinfo(void); void arch_crash_save_vmcoreinfo(void); __printf(1, 2) @@ -57,7 +58,6 @@ phys_addr_t paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("CONFIG_%s=y\n", #name) extern u32 *vmcoreinfo_note; -extern size_t vmcoreinfo_size; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 65888418fb69..dd056fab9e35 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -172,6 +172,7 @@ struct kimage { unsigned long start; struct page *control_code_page; struct page *swap_page; + void *vmcoreinfo_data_copy; /* locates in the crash memory */ unsigned long nr_segments; struct kexec_segment segment[KEXEC_SEGMENT_MAX]; @@ -241,6 +242,7 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); int kexec_crash_loaded(void); void crash_save_cpu(struct pt_regs *regs, int cpu); +extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; -- cgit v1.2.3 From 61d9b56a89208d8cccd0b4cfec7e6959717e16e3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 12 Jul 2017 14:33:40 -0700 Subject: sysctl: add unsigned int range support To keep parity with regular int interfaces provide the an unsigned int proc_douintvec_minmax() which allows you to specify a range of allowed valid numbers. Adding proc_douintvec_minmax_sysadmin() is easy but we can wait for an actual user for that. Link: http://lkml.kernel.org/r/20170519033554.18592-6-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Acked-by: Kees Cook Cc: Subash Abhinov Kasiviswanathan Cc: Heinrich Schuchardt Cc: Kees Cook Cc: "David S. Miller" Cc: Ingo Molnar Cc: Al Viro Cc: "Eric W. Biederman" Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 80d07816def0..225001d437ae 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -47,6 +47,9 @@ extern int proc_douintvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); +extern int proc_douintvec_minmax(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, -- cgit v1.2.3 From 0791e3644e5ef21646fe565b9061788d05ec71d4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 12 Jul 2017 14:34:28 -0700 Subject: kcmp: add KCMP_EPOLL_TFD mode to compare epoll target files With current epoll architecture target files are addressed with file_struct and file descriptor number, where the last is not unique. Moreover files can be transferred from another process via unix socket, added into queue and closed then so we won't find this descriptor in the task fdinfo list. Thus to checkpoint and restore such processes CRIU needs to find out where exactly the target file is present to add it into epoll queue. For this sake one can use kcmp call where some particular target file from the queue is compared with arbitrary file passed as an argument. Because epoll target files can have same file descriptor number but different file_struct a caller should explicitly specify the offset within. To test if some particular file is matching entry inside epoll one have to - fill kcmp_epoll_slot structure with epoll file descriptor, target file number and target file offset (in case if only one target is present then it should be 0) - call kcmp as kcmp(pid1, pid2, KCMP_EPOLL_TFD, fd, &kcmp_epoll_slot) - the kernel fetch file pointer matching file descriptor @fd of pid1 - lookups for file struct in epoll queue of pid2 and returns traditional 0,1,2 result for sorting purpose Link: http://lkml.kernel.org/r/20170424154423.511592110@gmail.com Signed-off-by: Cyrill Gorcunov Acked-by: Andrey Vagin Cc: Al Viro Cc: Pavel Emelyanov Cc: Michael Kerrisk Cc: Jason Baron Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/eventpoll.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 6daf6d4971f6..d8625d214ea7 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -14,6 +14,7 @@ #define _LINUX_EVENTPOLL_H #include +#include /* Forward declarations to avoid compiler errors */ @@ -22,6 +23,8 @@ struct file; #ifdef CONFIG_EPOLL +struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff); + /* Used to initialize the epoll bits inside the "struct file" */ static inline void eventpoll_init_file(struct file *file) { -- cgit v1.2.3 From 92ef6da3d06ff551a86de41ae37df9cc4b58d7a0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 12 Jul 2017 14:34:31 -0700 Subject: kcmp: fs/epoll: wrap kcmp code with CONFIG_CHECKPOINT_RESTORE kcmp syscall is build iif CONFIG_CHECKPOINT_RESTORE is selected, so wrap appropriate helpers in epoll code with the config to build it conditionally. Link: http://lkml.kernel.org/r/20170513083456.GG1881@uranus.lan Signed-off-by: Cyrill Gorcunov Reported-by: Andrew Morton Cc: Andrey Vagin Cc: Al Viro Cc: Pavel Emelyanov Cc: Michael Kerrisk Cc: Jason Baron Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/eventpoll.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index d8625d214ea7..2f14ac73d01d 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -23,7 +23,9 @@ struct file; #ifdef CONFIG_EPOLL +#ifdef CONFIG_CHECKPOINT_RESTORE struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff); +#endif /* Used to initialize the epoll bits inside the "struct file" */ static inline void eventpoll_init_file(struct file *file) -- cgit v1.2.3 From e41d58185f1444368873d4d7422f7664a68be61d Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 12 Jul 2017 14:34:35 -0700 Subject: fault-inject: support systematic fault injection Add /proc/self/task//fail-nth file that allows failing 0-th, 1-st, 2-nd and so on calls systematically. Excerpt from the added documentation: "Write to this file of integer N makes N-th call in the current task fail (N is 0-based). Read from this file returns a single char 'Y' or 'N' that says if the fault setup with a previous write to this file was injected or not, and disables the fault if it wasn't yet injected. Note that this file enables all types of faults (slab, futex, etc). This setting takes precedence over all other generic settings like probability, interval, times, etc. But per-capability settings (e.g. fail_futex/ignore-private) take precedence over it. This feature is intended for systematic testing of faults in a single system call. See an example below" Why add a new setting: 1. Existing settings are global rather than per-task. So parallel testing is not possible. 2. attr->interval is close but it depends on attr->count which is non reset to 0, so interval does not work as expected. 3. Trying to model this with existing settings requires manipulations of all of probability, interval, times, space, task-filter and unexposed count and per-task make-it-fail files. 4. Existing settings are per-failure-type, and the set of failure types is potentially expanding. 5. make-it-fail can't be changed by unprivileged user and aggressive stress testing better be done from an unprivileged user. Similarly, this would require opening the debugfs files to the unprivileged user, as he would need to reopen at least times file (not possible to pre-open before dropping privs). The proposed interface solves all of the above (see the example). We want to integrate this into syzkaller fuzzer. A prototype has found 10 bugs in kernel in first day of usage: https://groups.google.com/forum/#!searchin/syzkaller/%22FAULT_INJECTION%22%7Csort:relevance I've made the current interface work with all types of our sandboxes. For setuid the secret sauce was prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) to make /proc entries non-root owned. So I am fine with the current version of the code. [akpm@linux-foundation.org: fix build] Link: http://lkml.kernel.org/r/20170328130128.101773-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Akinobu Mita Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 20814b7d7d70..3822d749fc9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -974,6 +974,7 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; + int fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call -- cgit v1.2.3 From 1a23395672658969a4035dcc518ea6cab835c579 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Wed, 12 Jul 2017 14:34:38 -0700 Subject: ipc/sem.c: remove sem_base, embed struct sem sma->sem_base is initialized with sma->sem_base = (struct sem *) &sma[1]; The current code has four problems: - There is an unnecessary pointer dereference - sem_base is not needed. - Alignment for struct sem only works by chance. - The current code causes false positive for static code analysis. - This is a cast between different non-void types, which the future randstruct GCC plugin warns on. And, as bonus, the code size gets smaller: Before: 0 .text 00003770 After: 0 .text 0000374e [manfred@colorfullife.com: s/[0]/[]/, per hch] Link: http://lkml.kernel.org/r/20170525185107.12869-2-manfred@colorfullife.com Link: http://lkml.kernel.org/r/20170515171912.6298-2-manfred@colorfullife.com Signed-off-by: Manfred Spraul Acked-by: Kees Cook Cc: Kees Cook Cc: <1vier1@web.de> Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Fabian Frederick Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sem.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sem.h b/include/linux/sem.h index 9edec926e9d9..9db14093b73c 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -8,11 +8,29 @@ struct task_struct; +/* One semaphore structure for each semaphore in the system. */ +struct sem { + int semval; /* current value */ + /* + * PID of the process that last modified the semaphore. For + * Linux, specifically these are: + * - semop + * - semctl, via SETVAL and SETALL. + * - at task exit when performing undo adjustments (see exit_sem). + */ + int sempid; + spinlock_t lock; /* spinlock for fine-grained semtimedop */ + struct list_head pending_alter; /* pending single-sop operations */ + /* that alter the semaphore */ + struct list_head pending_const; /* pending single-sop operations */ + /* that do not alter the semaphore*/ + time_t sem_otime; /* candidate for sem_otime */ +} ____cacheline_aligned_in_smp; + /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ time_t sem_ctime; /* last change time */ - struct sem *sem_base; /* ptr to first semaphore in array */ struct list_head pending_alter; /* pending operations */ /* that alter the array */ struct list_head pending_const; /* pending complex operations */ @@ -21,6 +39,8 @@ struct sem_array { int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ unsigned int use_global_lock;/* >0: global lock required */ + + struct sem sems[]; }; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3 From dba4cdd39e698d8dcdad0656825423052ac90ccd Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Wed, 12 Jul 2017 14:34:41 -0700 Subject: ipc: merge ipc_rcu and kern_ipc_perm ipc has two management structures that exist for every id: - struct kern_ipc_perm, it contains e.g. the permissions. - struct ipc_rcu, it contains the rcu head for rcu handling and the refcount. The patch merges both structures. As a bonus, we may save one cacheline, because both structures are cacheline aligned. In addition, it reduces the number of casts, instead most codepaths can use container_of. To simplify code, the ipc_rcu_alloc initializes the allocation to 0. [manfred@colorfullife.com: really include the memset() into ipc_alloc_rcu()] Link: http://lkml.kernel.org/r/564f8612-0601-b267-514f-a9f650ec9b32@colorfullife.com Link: http://lkml.kernel.org/r/20170525185107.12869-3-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: Davidlohr Bueso Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 71fd92d81b26..5591f055e13f 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -20,6 +20,9 @@ struct kern_ipc_perm { umode_t mode; unsigned long seq; void *security; + + struct rcu_head rcu; + atomic_t refcount; } ____cacheline_aligned_in_smp; #endif /* _LINUX_IPC_H */ -- cgit v1.2.3 From 2cd648c110b5570c3280bd645797658cabbe5f5c Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Wed, 12 Jul 2017 14:34:44 -0700 Subject: include/linux/sem.h: correctly document sem_ctime sem_ctime is initialized to the semget() time and then updated at every semctl() that changes the array. Thus it does not represent the time of the last change. Especially, semop() calls are only stored in sem_otime, not in sem_ctime. This is already described in ipc/sem.c, I just overlooked that there is a comment in include/linux/sem.h and man semctl(2) as well. So: Correct wrong comments. Link: http://lkml.kernel.org/r/20170515171912.6298-4-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: Kees Cook Cc: <1vier1@web.de> Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sem.h b/include/linux/sem.h index 9db14093b73c..be5cf2ea14ad 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -30,7 +30,7 @@ struct sem { /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ - time_t sem_ctime; /* last change time */ + time_t sem_ctime; /* create/last semctl() time */ struct list_head pending_alter; /* pending operations */ /* that alter the array */ struct list_head pending_const; /* pending complex operations */ -- cgit v1.2.3 From 24bb44612c5f93a1dff1f7e71b7b7b109a988791 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 12 Jul 2017 14:35:40 -0700 Subject: kernel/watchdog: remove unused declaration Patch series "Improve watchdog config for arch watchdogs", v4. A series to make the hardlockup watchdog more easily replaceable by arch code. The last patch provides some justification for why we want to do this (existing sparc watchdog is another that could benefit). This patch (of 5): Remove unused declaration. Link: http://lkml.kernel.org/r/20170616065715.18390-2-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Don Zickus Reviewed-by: Babu Moger Tested-by: Babu Moger [sparc] Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index aa3cd0878270..5e2e57536d98 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -12,9 +12,6 @@ extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern unsigned int hardlockup_panic; void lockup_detector_init(void); -- cgit v1.2.3 From f2e0cff85ed111a3cf24d894c3fa11697dfae628 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 12 Jul 2017 14:35:43 -0700 Subject: kernel/watchdog: introduce arch_touch_nmi_watchdog() For architectures that define HAVE_NMI_WATCHDOG, instead of having them provide the complete touch_nmi_watchdog() function, just have them provide arch_touch_nmi_watchdog(). This gives the generic code more flexibility in implementing this function, and arch implementations don't miss out on touching the softlockup watchdog or other generic details. Link: http://lkml.kernel.org/r/20170616065715.18390-3-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Don Zickus Reviewed-by: Babu Moger Tested-by: Babu Moger [sparc] Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 5e2e57536d98..bd387ef8bccd 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -6,6 +6,9 @@ #include #include +#if defined(CONFIG_HAVE_NMI_WATCHDOG) +#include +#endif #ifdef CONFIG_LOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); @@ -58,6 +61,18 @@ static inline void reset_hung_task_detector(void) #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) +#if defined(CONFIG_HARDLOCKUP_DETECTOR) +extern void hardlockup_detector_disable(void); +#else +static inline void hardlockup_detector_disable(void) {} +#endif + +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) +extern void arch_touch_nmi_watchdog(void); +#else +static inline void arch_touch_nmi_watchdog(void) {} +#endif + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * @@ -65,21 +80,11 @@ static inline void reset_hung_task_detector(void) * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) -#include -extern void touch_nmi_watchdog(void); -#else static inline void touch_nmi_watchdog(void) { + arch_touch_nmi_watchdog(); touch_softlockup_watchdog(); } -#endif - -#if defined(CONFIG_HARDLOCKUP_DETECTOR) -extern void hardlockup_detector_disable(void); -#else -static inline void hardlockup_detector_disable(void) {} -#endif /* * Create trigger_all_cpu_backtrace() out of the arch-provided -- cgit v1.2.3 From 05a4a95279311c3a4633b4277a5d21cfd616c6c7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 12 Jul 2017 14:35:46 -0700 Subject: kernel/watchdog: split up config options Split SOFTLOCKUP_DETECTOR from LOCKUP_DETECTOR, and split HARDLOCKUP_DETECTOR_PERF from HARDLOCKUP_DETECTOR. LOCKUP_DETECTOR implies the general boot, sysctl, and programming interfaces for the lockup detectors. An architecture that wants to use a hard lockup detector must define HAVE_HARDLOCKUP_DETECTOR_PERF or HAVE_HARDLOCKUP_DETECTOR_ARCH. Alternatively an arch can define HAVE_NMI_WATCHDOG, which provides the minimum arch_touch_nmi_watchdog, and it otherwise does its own thing and does not implement the LOCKUP_DETECTOR interfaces. sparc is unusual in that it has started to implement some of the interfaces, but not fully yet. It should probably be converted to a full HAVE_HARDLOCKUP_DETECTOR_ARCH. [npiggin@gmail.com: fix] Link: http://lkml.kernel.org/r/20170617223522.66c0ad88@roar.ozlabs.ibm.com Link: http://lkml.kernel.org/r/20170616065715.18390-4-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Don Zickus Reviewed-by: Babu Moger Tested-by: Babu Moger [sparc] Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index bd387ef8bccd..8aa01fd859fb 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -11,13 +11,21 @@ #endif #ifdef CONFIG_LOCKUP_DETECTOR +void lockup_detector_init(void); +#else +static inline void lockup_detector_init(void) +{ +} +#endif + +#ifdef CONFIG_SOFTLOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; -extern unsigned int hardlockup_panic; -void lockup_detector_init(void); +extern int soft_watchdog_enabled; +extern atomic_t watchdog_park_in_progress; #else static inline void touch_softlockup_watchdog_sched(void) { @@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void) static inline void touch_all_softlockup_watchdogs(void) { } -static inline void lockup_detector_init(void) -{ -} #endif #ifdef CONFIG_DETECT_HUNG_TASK @@ -63,15 +68,18 @@ static inline void reset_hung_task_detector(void) #if defined(CONFIG_HARDLOCKUP_DETECTOR) extern void hardlockup_detector_disable(void); +extern unsigned int hardlockup_panic; #else static inline void hardlockup_detector_disable(void) {} #endif -#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) +#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); #else +#if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline void arch_touch_nmi_watchdog(void) {} #endif +#endif /** * touch_nmi_watchdog - restart NMI watchdog timeout. @@ -141,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu) } #endif -#ifdef CONFIG_LOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh); +#endif + +#ifdef CONFIG_LOCKUP_DETECTOR extern int nmi_watchdog_enabled; -extern int soft_watchdog_enabled; extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; +extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; -extern atomic_t watchdog_park_in_progress; +extern int __read_mostly watchdog_suspended; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; -- cgit v1.2.3 From 6974f0c4555e285ab217cee58b6e874f776ff409 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 12 Jul 2017 14:36:10 -0700 Subject: include/linux/string.h: add the option of fortified string.h functions This adds support for compiling with a rough equivalent to the glibc _FORTIFY_SOURCE=1 feature, providing compile-time and runtime buffer overflow checks for string.h functions when the compiler determines the size of the source or destination buffer at compile-time. Unlike glibc, it covers buffer reads in addition to writes. GNU C __builtin_*_chk intrinsics are avoided because they would force a much more complex implementation. They aren't designed to detect read overflows and offer no real benefit when using an implementation based on inline checks. Inline checks don't add up to much code size and allow full use of the regular string intrinsics while avoiding the need for a bunch of _chk functions and per-arch assembly to avoid wrapper overhead. This detects various overflows at compile-time in various drivers and some non-x86 core kernel code. There will likely be issues caught in regular use at runtime too. Future improvements left out of initial implementation for simplicity, as it's all quite optional and can be done incrementally: * Some of the fortified string functions (strncpy, strcat), don't yet place a limit on reads from the source based on __builtin_object_size of the source buffer. * Extending coverage to more string functions like strlcat. * It should be possible to optionally use __builtin_object_size(x, 1) for some functions (C strings) to detect intra-object overflows (like glibc's _FORTIFY_SOURCE=2), but for now this takes the conservative approach to avoid likely compatibility issues. * The compile-time checks should be made available via a separate config option which can be enabled by default (or always enabled) once enough time has passed to get the issues it catches fixed. Kees said: "This is great to have. While it was out-of-tree code, it would have blocked at least CVE-2016-3858 from being exploitable (improper size argument to strlcpy()). I've sent a number of fixes for out-of-bounds-reads that this detected upstream already" [arnd@arndb.de: x86: fix fortified memcpy] Link: http://lkml.kernel.org/r/20170627150047.660360-1-arnd@arndb.de [keescook@chromium.org: avoid panic() in favor of BUG()] Link: http://lkml.kernel.org/r/20170626235122.GA25261@beast [keescook@chromium.org: move from -mm, add ARCH_HAS_FORTIFY_SOURCE, tweak Kconfig help] Link: http://lkml.kernel.org/r/20170526095404.20439-1-danielmicay@gmail.com Link: http://lkml.kernel.org/r/1497903987-21002-8-git-send-email-keescook@chromium.org Signed-off-by: Daniel Micay Signed-off-by: Kees Cook Signed-off-by: Arnd Bergmann Acked-by: Kees Cook Cc: Mark Rutland Cc: Daniel Axtens Cc: Rasmus Villemoes Cc: Andy Shevchenko Cc: Chris Metcalf Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 200 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 7439d83eaa33..96f5a5fd0377 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -193,4 +193,204 @@ static inline const char *kbasename(const char *path) return tail ? tail + 1 : path; } +#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline)) +#define __RENAME(x) __asm__(#x) + +void fortify_panic(const char *name) __noreturn __cold; +void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter"); +void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter"); +void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); + +#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) +__FORTIFY_INLINE char *strcpy(char *p, const char *q) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strcpy(p, q); + if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0) + fortify_panic(__func__); + return p; +} + +__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __write_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __builtin_strncpy(p, q, size); +} + +__FORTIFY_INLINE char *strcat(char *p, const char *q) +{ + size_t p_size = __builtin_object_size(p, 0); + if (p_size == (size_t)-1) + return __builtin_strcat(p, q); + if (strlcat(p, q, p_size) >= p_size) + fortify_panic(__func__); + return p; +} + +__FORTIFY_INLINE __kernel_size_t strlen(const char *p) +{ + __kernel_size_t ret; + size_t p_size = __builtin_object_size(p, 0); + if (p_size == (size_t)-1) + return __builtin_strlen(p); + ret = strnlen(p, p_size); + if (p_size <= ret) + fortify_panic(__func__); + return ret; +} + +extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); +__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) +{ + size_t p_size = __builtin_object_size(p, 0); + __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); + if (p_size <= ret && maxlen != ret) + fortify_panic(__func__); + return ret; +} + +/* defined after fortified strlen to reuse it */ +extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); +__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) +{ + size_t ret; + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __real_strlcpy(p, q, size); + ret = strlen(q); + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + if (__builtin_constant_p(len) && len >= p_size) + __write_overflow(); + if (len >= p_size) + fortify_panic(__func__); + __builtin_memcpy(p, q, len); + p[len] = '\0'; + } + return ret; +} + +/* defined after fortified strlen and strnlen to reuse them */ +__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) +{ + size_t p_len, copy_len; + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strncat(p, q, count); + p_len = strlen(p); + copy_len = strnlen(q, count); + if (p_size < p_len + copy_len + 1) + fortify_panic(__func__); + __builtin_memcpy(p + p_len, q, copy_len); + p[p_len + copy_len] = '\0'; + return p; +} + +__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __write_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __builtin_memset(p, c, size); +} + +__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (__builtin_constant_p(size)) { + if (p_size < size) + __write_overflow(); + if (q_size < size) + __read_overflow2(); + } + if (p_size < size || q_size < size) + fortify_panic(__func__); + return __builtin_memcpy(p, q, size); +} + +__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (__builtin_constant_p(size)) { + if (p_size < size) + __write_overflow(); + if (q_size < size) + __read_overflow2(); + } + if (p_size < size || q_size < size) + fortify_panic(__func__); + return __builtin_memmove(p, q, size); +} + +extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); +__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __real_memscan(p, c, size); +} + +__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (__builtin_constant_p(size)) { + if (p_size < size) + __read_overflow(); + if (q_size < size) + __read_overflow2(); + } + if (p_size < size || q_size < size) + fortify_panic(__func__); + return __builtin_memcmp(p, q, size); +} + +__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __builtin_memchr(p, c, size); +} + +void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv); +__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __real_memchr_inv(p, c, size); +} + +extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); +__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __real_kmemdup(p, size, gfp); +} +#endif + #endif /* _LINUX_STRING_H_ */ -- cgit v1.2.3 From 022c204040f3fd22d6445bc35517786195b7ae80 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 12 Jul 2017 14:36:17 -0700 Subject: random,stackprotect: introduce get_random_canary function Patch series "stackprotector: ascii armor the stack canary", v2. Zero out the first byte of the stack canary value on 64 bit systems, in order to mitigate unterminated C string overflows. The null byte both prevents C string functions from reading the canary, and from writing it if the canary value were guessed or obtained through some other means. Reducing the entropy by 8 bits is acceptable on 64-bit systems, which will still have 56 bits of entropy left, but not on 32 bit systems, so the "ascii armor" canary is only implemented on 64-bit systems. Inspired by the "ascii armor" code in execshield and Daniel Micay's linux-hardened tree. Also see https://github.com/thestinger/linux-hardened/ This patch (of 5): Introduce get_random_canary(), which provides a random unsigned long canary value with the first byte zeroed out on 64 bit architectures, in order to mitigate non-terminated C string overflows. The null byte both prevents C string functions from reading the canary, and from writing it if the canary value were guessed or obtained through some other means. Reducing the entropy by 8 bits is acceptable on 64-bit systems, which will still have 56 bits of entropy left, but not on 32 bit systems, so the "ascii armor" canary is only implemented on 64-bit systems. Inspired by the "ascii armor" code in the old execshield patches, and Daniel Micay's linux-hardened tree. Link: http://lkml.kernel.org/r/20170524155751.424-2-riel@redhat.com Signed-off-by: Rik van Riel Acked-by: Kees Cook Cc: Daniel Micay Cc: "Theodore Ts'o" Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Catalin Marinas Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/random.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index ed5c3838780d..1fa0dc880bd7 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -57,6 +57,27 @@ static inline unsigned long get_random_long(void) #endif } +/* + * On 64-bit architectures, protect against non-terminated C string overflows + * by zeroing out the first byte of the canary; this leaves 56 bits of entropy. + */ +#ifdef CONFIG_64BIT +# ifdef __LITTLE_ENDIAN +# define CANARY_MASK 0xffffffffffffff00UL +# else /* big endian, 64 bits: */ +# define CANARY_MASK 0x00ffffffffffffffUL +# endif +#else /* 32 bits: */ +# define CANARY_MASK 0xffffffffUL +#endif + +static inline unsigned long get_random_canary(void) +{ + unsigned long val = get_random_long(); + + return val & CANARY_MASK; +} + unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); -- cgit v1.2.3 From dcda9b04713c3f6ff0875652924844fae28286ea Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 12 Jul 2017 14:36:45 -0700 Subject: mm, tree wide: replace __GFP_REPEAT by __GFP_RETRY_MAYFAIL with more useful semantic __GFP_REPEAT was designed to allow retry-but-eventually-fail semantic to the page allocator. This has been true but only for allocations requests larger than PAGE_ALLOC_COSTLY_ORDER. It has been always ignored for smaller sizes. This is a bit unfortunate because there is no way to express the same semantic for those requests and they are considered too important to fail so they might end up looping in the page allocator for ever, similarly to GFP_NOFAIL requests. Now that the whole tree has been cleaned up and accidental or misled usage of __GFP_REPEAT flag has been removed for !costly requests we can give the original flag a better name and more importantly a more useful semantic. Let's rename it to __GFP_RETRY_MAYFAIL which tells the user that the allocator would try really hard but there is no promise of a success. This will work independent of the order and overrides the default allocator behavior. Page allocator users have several levels of guarantee vs. cost options (take GFP_KERNEL as an example) - GFP_KERNEL & ~__GFP_RECLAIM - optimistic allocation without _any_ attempt to free memory at all. The most light weight mode which even doesn't kick the background reclaim. Should be used carefully because it might deplete the memory and the next user might hit the more aggressive reclaim - GFP_KERNEL & ~__GFP_DIRECT_RECLAIM (or GFP_NOWAIT)- optimistic allocation without any attempt to free memory from the current context but can wake kswapd to reclaim memory if the zone is below the low watermark. Can be used from either atomic contexts or when the request is a performance optimization and there is another fallback for a slow path. - (GFP_KERNEL|__GFP_HIGH) & ~__GFP_DIRECT_RECLAIM (aka GFP_ATOMIC) - non sleeping allocation with an expensive fallback so it can access some portion of memory reserves. Usually used from interrupt/bh context with an expensive slow path fallback. - GFP_KERNEL - both background and direct reclaim are allowed and the _default_ page allocator behavior is used. That means that !costly allocation requests are basically nofail but there is no guarantee of that behavior so failures have to be checked properly by callers (e.g. OOM killer victim is allowed to fail currently). - GFP_KERNEL | __GFP_NORETRY - overrides the default allocator behavior and all allocation requests fail early rather than cause disruptive reclaim (one round of reclaim in this implementation). The OOM killer is not invoked. - GFP_KERNEL | __GFP_RETRY_MAYFAIL - overrides the default allocator behavior and all allocation requests try really hard. The request will fail if the reclaim cannot make any progress. The OOM killer won't be triggered. - GFP_KERNEL | __GFP_NOFAIL - overrides the default allocator behavior and all allocation requests will loop endlessly until they succeed. This might be really dangerous especially for larger orders. Existing users of __GFP_REPEAT are changed to __GFP_RETRY_MAYFAIL because they already had their semantic. No new users are added. __alloc_pages_slowpath is changed to bail out for __GFP_RETRY_MAYFAIL if there is no progress and we have already passed the OOM point. This means that all the reclaim opportunities have been exhausted except the most disruptive one (the OOM killer) and a user defined fallback behavior is more sensible than keep retrying in the page allocator. [akpm@linux-foundation.org: fix arch/sparc/kernel/mdesc.c] [mhocko@suse.com: semantic fix] Link: http://lkml.kernel.org/r/20170626123847.GM11534@dhcp22.suse.cz [mhocko@kernel.org: address other thing spotted by Vlastimil] Link: http://lkml.kernel.org/r/20170626124233.GN11534@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20170623085345.11304-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Alex Belits Cc: Chris Wilson Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: David Daney Cc: Johannes Weiner Cc: Mel Gorman Cc: NeilBrown Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 56 ++++++++++++++++++++++++++++++++++++++++------------ include/linux/slab.h | 3 ++- 2 files changed, 45 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 4c6656f1fee7..bcfb9f7c46f5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -25,7 +25,7 @@ struct vm_area_struct; #define ___GFP_FS 0x80u #define ___GFP_COLD 0x100u #define ___GFP_NOWARN 0x200u -#define ___GFP_REPEAT 0x400u +#define ___GFP_RETRY_MAYFAIL 0x400u #define ___GFP_NOFAIL 0x800u #define ___GFP_NORETRY 0x1000u #define ___GFP_MEMALLOC 0x2000u @@ -136,26 +136,56 @@ struct vm_area_struct; * * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. * - * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt - * _might_ fail. This depends upon the particular VM implementation. + * The default allocator behavior depends on the request size. We have a concept + * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules + * + * __GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput + * + * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made else where. It can wait for other + * tasks to attempt high level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with __GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. * * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. New users should be evaluated carefully - * (and the flag should be used only when there is no reasonable failure - * policy) but it is definitely preferable to use the flag rather than - * opencode endless loop around allocator. - * - * __GFP_NORETRY: The VM implementation must not retry indefinitely and will - * return NULL when direct reclaim and memory compaction have failed to allow - * the allocation to succeed. The OOM killer is not called with the current - * implementation. + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Using this flag for costly allocations is _highly_ discouraged. */ #define __GFP_IO ((__force gfp_t)___GFP_IO) #define __GFP_FS ((__force gfp_t)___GFP_FS) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) -#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) #define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) #define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) diff --git a/include/linux/slab.h b/include/linux/slab.h index 04a7f7993e67..41473df6dfb0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -471,7 +471,8 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * * %__GFP_NOWARN - If allocation fails, don't issue any warnings. * - * %__GFP_REPEAT - If allocation fails initially, try once more before failing. + * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail + * eventually. * * There are other flags available as well, but these are not intended * for general use, and so are not documented here. For a full list of -- cgit v1.2.3 From 0f55685627d6dd2beda55a82abc02297f0f8e5c2 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 12 Jul 2017 14:36:58 -0700 Subject: mm, migration: do not trigger OOM killer when migrating memory Page migration (for memory hotplug, soft_offline_page or mbind) needs to allocate a new memory. This can trigger an oom killer if the target memory is depleated. Although quite unlikely, still possible, especially for the memory hotplug (offlining of memoery). Up to now we didn't really have reasonable means to back off. __GFP_NORETRY can fail just too easily and __GFP_THISNODE sticks to a single node and that is not suitable for all callers. But now that we have __GFP_RETRY_MAYFAIL we should use it. It is preferable to fail the migration than disrupt the system by killing some processes. Link: http://lkml.kernel.org/r/20170623085345.11304-7-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Alex Belits Cc: Chris Wilson Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: David Daney Cc: Johannes Weiner Cc: Mel Gorman Cc: NeilBrown Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 4634da521238..3e0d405dc842 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -34,7 +34,7 @@ extern char *migrate_reason_names[MR_TYPES]; static inline struct page *new_page_nodemask(struct page *page, int preferred_nid, nodemask_t *nodemask) { - gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; + gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; if (PageHuge(page)) return alloc_huge_page_nodemask(page_hstate(compound_head(page)), -- cgit v1.2.3 From c945dccc80856107f109c36a7d0e29a371b5d1b5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Jul 2017 14:37:48 -0700 Subject: ARM: samsung: usb-ohci: move inline before return type Make the code like the rest of the kernel. Link: http://lkml.kernel.org/r/667a515b8d0f10f2465d519f8595edd91552fc5e.1499284835.git.joe@perches.com Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/platform_data/usb-ohci-s3c2410.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/usb-ohci-s3c2410.h b/include/linux/platform_data/usb-ohci-s3c2410.h index 7fa1fbefc3f2..cc7554ae6e8b 100644 --- a/include/linux/platform_data/usb-ohci-s3c2410.h +++ b/include/linux/platform_data/usb-ohci-s3c2410.h @@ -31,7 +31,7 @@ struct s3c2410_hcd_info { void (*report_oc)(struct s3c2410_hcd_info *, int ports); }; -static void inline s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports) +static inline void s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports) { if (info->report_oc != NULL) { (info->report_oc)(info, ports); -- cgit v1.2.3 From 3e8f399da490e6ac20a3cfd6aa404c9aa961a9a2 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 12 Jul 2017 14:37:51 -0700 Subject: writeback: rework wb_[dec|inc]_stat family of functions Currently the writeback statistics code uses a percpu counters to hold various statistics. Furthermore we have 2 families of functions - those which disable local irq and those which doesn't and whose names begin with double underscore. However, they both end up calling __add_wb_stats which in turn calls percpu_counter_add_batch which is already irq-safe. Exploiting this fact allows to eliminated the __wb_* functions since they don't add any further protection than we already have. Furthermore, refactor the wb_* function to call __add_wb_stat directly without the irq-disabling dance. This will likely result in better runtime of code which deals with modifying the stat counters. While at it also document why percpu_counter_add_batch is in fact preempt and irq-safe since at least 3 people got confused. Link: http://lkml.kernel.org/r/1498029937-27293-1-git-send-email-nborisov@suse.com Signed-off-by: Nikolay Borisov Acked-by: Tejun Heo Reviewed-by: Jan Kara Cc: Josef Bacik Cc: Mel Gorman Cc: Jeff Layton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 334165c911f0..854e1bdd0b2a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -69,34 +69,14 @@ static inline void __add_wb_stat(struct bdi_writeback *wb, percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); } -static inline void __inc_wb_stat(struct bdi_writeback *wb, - enum wb_stat_item item) -{ - __add_wb_stat(wb, item, 1); -} - static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - unsigned long flags; - - local_irq_save(flags); - __inc_wb_stat(wb, item); - local_irq_restore(flags); -} - -static inline void __dec_wb_stat(struct bdi_writeback *wb, - enum wb_stat_item item) -{ - __add_wb_stat(wb, item, -1); + __add_wb_stat(wb, item, 1); } static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - unsigned long flags; - - local_irq_save(flags); - __dec_wb_stat(wb, item); - local_irq_restore(flags); + __add_wb_stat(wb, item, -1); } static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) -- cgit v1.2.3 From 3c48d86cc959309ee168fb87737a8cb3f97c5224 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 12 Jul 2017 15:04:16 -0700 Subject: clk: Provide bulk prepare_enable disable_unprepare variants This extends the existing set of bulk helpers with prepare_enable and disable_unprepare variants. Cc: Russell King , Cc: Dong Aisheng Signed-off-by: Bjorn Andersson Signed-off-by: Stephen Boyd --- include/linux/clk.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index c673f0b91751..690e6a6921e1 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -657,6 +657,28 @@ static inline void clk_disable_unprepare(struct clk *clk) clk_unprepare(clk); } +static inline int clk_bulk_prepare_enable(int num_clks, + struct clk_bulk_data *clks) +{ + int ret; + + ret = clk_bulk_prepare(num_clks, clks); + if (ret) + return ret; + ret = clk_bulk_enable(num_clks, clks); + if (ret) + clk_bulk_unprepare(num_clks, clks); + + return ret; +} + +static inline void clk_bulk_disable_unprepare(int num_clks, + struct clk_bulk_data *clks) +{ + clk_bulk_disable(num_clks, clks); + clk_bulk_unprepare(num_clks, clks); +} + #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK) struct clk *of_clk_get(struct device_node *np, int index); struct clk *of_clk_get_by_name(struct device_node *np, const char *name); -- cgit v1.2.3 From 0aebdc52ca824c38837a652548028e45da72628f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 09:31:19 +0200 Subject: sunrpc: properly type argument to kxdreproc_t Pass struct rpc_request as the first argument instead of an untyped blob, and mark the data object as const. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton --- include/linux/sunrpc/xdr.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 054c8cde18f3..290f189de200 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -17,6 +17,8 @@ #include #include +struct rpc_rqst; + /* * Buffer adjustment */ @@ -222,7 +224,8 @@ struct xdr_stream { /* * These are the xdr_stream style generic XDR encode and decode functions. */ -typedef void (*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); +typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + const void *obj); typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); -- cgit v1.2.3 From 993328e2b31fedc35276a4828039ad7af6d519cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 14:58:11 +0200 Subject: sunrpc: properly type argument to kxdrdproc_t Pass struct rpc_request as the first argument instead of an untyped blob. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Acked-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 290f189de200..ed0fbf0d8d0f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -226,7 +226,8 @@ struct xdr_stream { */ typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, const void *obj); -typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); +typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); -- cgit v1.2.3 From c551858a884b6d81def3d1528a9002ba97f5d4ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 23:27:10 +0200 Subject: sunrpc: move p_count out of struct rpc_procinfo p_count is the only writeable memeber of struct rpc_procinfo, which is a good candidate to be const-ified as it contains function pointers. This patch moves it into out out struct rpc_procinfo, and into a separate writable array that is pointed to by struct rpc_version and indexed by p_statidx. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/clnt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6095ecba0dde..c75ba37151fe 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -88,6 +88,7 @@ struct rpc_version { u32 number; /* version number */ unsigned int nrprocs; /* number of procs */ struct rpc_procinfo * procs; /* procedure array */ + unsigned int *counts; /* call counts */ }; /* @@ -99,7 +100,6 @@ struct rpc_procinfo { kxdrdproc_t p_decode; /* XDR decode function */ unsigned int p_arglen; /* argument hdr length (u32) */ unsigned int p_replen; /* reply hdr length (u32) */ - unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ u32 p_statidx; /* Which procedure to account */ const char * p_name; /* name of procedure */ -- cgit v1.2.3 From 511e936bf2b3e8be2a3160ace3d86be07962a7a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 15:36:49 +0200 Subject: sunrpc: mark all struct rpc_procinfo instances as const struct rpc_procinfo contains function pointers, and marking it as constant avoids it being able to be used as an attach vector for code injections. Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 ++-- include/linux/sunrpc/sched.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c75ba37151fe..55ef67bea06b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -39,7 +39,7 @@ struct rpc_clnt { struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ struct rpc_xprt __rcu * cl_xprt; /* transport */ - struct rpc_procinfo * cl_procinfo; /* procedure info */ + const struct rpc_procinfo *cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ cl_vers, /* RPC version number */ cl_maxproc; /* max procedure number */ @@ -87,7 +87,7 @@ struct rpc_program { struct rpc_version { u32 number; /* version number */ unsigned int nrprocs; /* number of procs */ - struct rpc_procinfo * procs; /* procedure array */ + const struct rpc_procinfo *procs; /* procedure array */ unsigned int *counts; /* call counts */ }; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 7ba040c797ec..ed60253abd0a 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -22,7 +22,7 @@ */ struct rpc_procinfo; struct rpc_message { - struct rpc_procinfo * rpc_proc; /* Procedure information */ + const struct rpc_procinfo *rpc_proc; /* Procedure information */ void * rpc_argp; /* Arguments */ void * rpc_resp; /* Result */ struct rpc_cred * rpc_cred; /* Credentials */ -- cgit v1.2.3 From 1c8a5409f3c4748ff42c1721d9578dd03091f378 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 17:35:49 +0200 Subject: sunrpc: properly type pc_func callbacks Drop the argp and resp arguments as they can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to svc_procfunc as well as the svc_procfunc typedef itself. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 11cef5a7bc87..3b58c55614c7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -419,9 +419,9 @@ struct svc_version { /* * RPC procedure info */ -typedef __be32 (*svc_procfunc)(struct svc_rqst *, void *argp, void *resp); struct svc_procedure { - svc_procfunc pc_func; /* process the request */ + /* process the request: */ + __be32 (*pc_func)(struct svc_rqst *); kxdrproc_t pc_decode; /* XDR decode args */ kxdrproc_t pc_encode; /* XDR encode result */ kxdrproc_t pc_release; /* XDR free result */ -- cgit v1.2.3 From 1150ded804c2be6d02efef3e39e39e570c9cea21 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 18:48:24 +0200 Subject: sunrpc: properly type pc_release callbacks Drop the p and resp arguments as they are always NULL or can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3b58c55614c7..c73194e9c2bd 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -424,7 +424,8 @@ struct svc_procedure { __be32 (*pc_func)(struct svc_rqst *); kxdrproc_t pc_decode; /* XDR decode args */ kxdrproc_t pc_encode; /* XDR encode result */ - kxdrproc_t pc_release; /* XDR free result */ + /* XDR free result: */ + void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_count; /* call count */ -- cgit v1.2.3 From cc6acc20a606f9ca65a6338af8204b7ae9e67b1a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:01:48 +0200 Subject: sunrpc: properly type pc_decode callbacks Drop the argp argument as it can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig --- include/linux/lockd/xdr.h | 18 +++++++++--------- include/linux/lockd/xdr4.h | 18 +++++++++--------- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 20 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d39ed1cc5fbf..0416600844ce 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -95,19 +95,19 @@ struct nlm_reboot { */ #define NLMSVC_XDRSIZE sizeof(struct nlm_args) -int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_decode_res(struct svc_rqst *, __be32 *); int nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *); -int nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *); -int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_void(struct svc_rqst *, __be32 *); +int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *); +int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); +int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); /* int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index e58c88b52ce1..951bbe31fdb8 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -23,19 +23,19 @@ -int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_decode_res(struct svc_rqst *, __be32 *); int nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *); -int nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *); -int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_void(struct svc_rqst *, __be32 *); +int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *); +int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); +int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); /* int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index c73194e9c2bd..d8703a5ab81e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -422,7 +422,8 @@ struct svc_version { struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); - kxdrproc_t pc_decode; /* XDR decode args */ + /* XDR decode args: */ + int (*pc_decode)(struct svc_rqst *, __be32 *data); kxdrproc_t pc_encode; /* XDR encode result */ /* XDR free result: */ void (*pc_release)(struct svc_rqst *); -- cgit v1.2.3 From d16d1867215663907f3212590d1a9d32398a0f47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:42:02 +0200 Subject: sunrpc: properly type pc_encode callbacks Drop the resp argument as it can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/lockd/xdr.h | 8 ++++---- include/linux/lockd/xdr4.h | 8 ++++---- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 0416600844ce..7acbecc21a40 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -96,16 +96,16 @@ struct nlm_reboot { #define NLMSVC_XDRSIZE sizeof(struct nlm_args) int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_res(struct svc_rqst *, __be32 *); int nlmsvc_decode_res(struct svc_rqst *, __be32 *); -int nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *); +int nlmsvc_encode_void(struct svc_rqst *, __be32 *); int nlmsvc_decode_void(struct svc_rqst *, __be32 *); int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *); int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); /* diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 951bbe31fdb8..bf1645609225 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -24,16 +24,16 @@ int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_res(struct svc_rqst *, __be32 *); int nlm4svc_decode_res(struct svc_rqst *, __be32 *); -int nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *); +int nlm4svc_encode_void(struct svc_rqst *, __be32 *); int nlm4svc_decode_void(struct svc_rqst *, __be32 *); int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *); int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d8703a5ab81e..bd9e313c444a 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -424,7 +424,8 @@ struct svc_procedure { __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ int (*pc_decode)(struct svc_rqst *, __be32 *data); - kxdrproc_t pc_encode; /* XDR encode result */ + /* XDR encode result: */ + int (*pc_encode)(struct svc_rqst *, __be32 *data); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ -- cgit v1.2.3 From 408b3d46ae06e1d219f31cbe629789a5e5c862aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:56:10 +0200 Subject: sunrpc: remove kxdrproc_t Remove the now unused typedef. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/xdr.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index ed0fbf0d8d0f..261b48a2701d 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -34,13 +34,6 @@ struct xdr_netobj { u8 * data; }; -/* - * This is the legacy generic XDR function. rqstp is either a rpc_rqst - * (client side) or svc_rqst pointer (server side). - * Encode functions always assume there's enough room in the buffer. - */ -typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj); - /* * Basic structure for transmission/reception of a client XDR message. * Features a header (for a linear buffer containing RPC headers -- cgit v1.2.3 From 0becc1181cdba562730be4d4b8a5fcb4368ef527 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 23:40:27 +0200 Subject: sunrpc: move pc_count out of struct svc_procinfo pc_count is the only writeable memeber of struct svc_procinfo, which is a good candidate to be const-ified as it contains function pointers. This patch moves it into out out struct svc_procinfo, and into a separate writable array that is pointed to by struct svc_version. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index bd9e313c444a..bcd114f038ef 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -398,6 +398,7 @@ struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ struct svc_procedure * vs_proc; /* per-procedure info */ + unsigned int *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ /* Don't register with rpcbind */ @@ -430,7 +431,6 @@ struct svc_procedure { void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ unsigned int pc_ressize; /* result struct size */ - unsigned int pc_count; /* call count */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ }; -- cgit v1.2.3 From b9c744c19c441f306239ac3e60a2a95b40d698f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 16:11:49 +0200 Subject: sunrpc: mark all struct svc_procinfo instances as const struct svc_procinfo contains function pointers, and marking it as constant avoids it being able to be used as an attach vector for code injections. Signed-off-by: Christoph Hellwig --- include/linux/lockd/lockd.h | 4 ++-- include/linux/sunrpc/svc.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 41f7b6a04d69..3eca67728366 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -192,9 +192,9 @@ struct nlm_block { * Global variables */ extern const struct rpc_program nlm_program; -extern struct svc_procedure nlmsvc_procedures[]; +extern const struct svc_procedure nlmsvc_procedures[]; #ifdef CONFIG_LOCKD_V4 -extern struct svc_procedure nlmsvc_procedures4[]; +extern const struct svc_procedure nlmsvc_procedures4[]; #endif extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index bcd114f038ef..992ea3419795 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -237,7 +237,7 @@ struct svc_rqst { struct svc_serv * rq_server; /* RPC service definition */ struct svc_pool * rq_pool; /* thread pool */ - struct svc_procedure * rq_procinfo; /* procedure info */ + const struct svc_procedure *rq_procinfo;/* procedure info */ struct auth_ops * rq_authop; /* authentication flavour */ struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ @@ -397,7 +397,7 @@ struct svc_program { struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ - struct svc_procedure * vs_proc; /* per-procedure info */ + const struct svc_procedure *vs_proc; /* per-procedure info */ unsigned int *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ -- cgit v1.2.3 From aa8217d5dcb1db594d816794ef6ab434ebf3e127 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 16:21:37 +0200 Subject: sunrpc: mark all struct svc_version instances as const Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 992ea3419795..eec04982a7ea 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -384,7 +384,7 @@ struct svc_program { unsigned int pg_lovers; /* lowest version */ unsigned int pg_hivers; /* highest version */ unsigned int pg_nvers; /* number of versions */ - struct svc_version ** pg_vers; /* version array */ + const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ struct svc_stat * pg_stats; /* rpc statistics */ -- cgit v1.2.3 From a7a3b1e971cd806b81ecea3a234d8dae9de0add0 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 20 Jun 2017 08:33:44 -0400 Subject: NFS: convert flags to bool NFS uses some int, and unsigned int :1, and bool as flags in structs and args. Assert the preference for uniformly replacing these with the bool type. Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b28c83475ee8..9b42bffbe07b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -878,7 +878,7 @@ struct nfs3_readdirargs { struct nfs_fh * fh; __u64 cookie; __be32 verf[2]; - int plus; + bool plus; unsigned int count; struct page ** pages; }; @@ -909,7 +909,7 @@ struct nfs3_linkres { struct nfs3_readdirres { struct nfs_fattr * dir_attr; __be32 * verf; - int plus; + bool plus; }; struct nfs3_getaclres { @@ -1053,7 +1053,7 @@ struct nfs4_readdir_arg { struct page ** pages; /* zero-copy data */ unsigned int pgbase; /* zero-copy data */ const u32 * bitmask; - int plus; + bool plus; }; struct nfs4_readdir_res { @@ -1585,7 +1585,7 @@ struct nfs_rpc_ops { int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, - u64, struct page **, unsigned int, int); + u64, struct page **, unsigned int, bool); int (*mknod) (struct inode *, struct dentry *, struct iattr *, dev_t); int (*statfs) (struct nfs_server *, struct nfs_fh *, @@ -1595,7 +1595,7 @@ struct nfs_rpc_ops { int (*pathconf) (struct nfs_server *, struct nfs_fh *, struct nfs_pathconf *); int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); - int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); + int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, bool); int (*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_header *); void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *); -- cgit v1.2.3 From 818a8dbe83fddff534b814a7d4e0c75b511dff2e Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 16 Jun 2017 11:13:00 -0400 Subject: NFS: nfs_rename() - revalidate directories on -ERESTARTSYS An interrupted rename will leave the old dentry behind if the rename succeeds. Fix this by forcing a lookup the next time through ->d_revalidate. A previous attempt at solving this problem took the approach to complete the work of the rename asynchronously, however that approach was wrong since it would allow the d_move() to occur after the directory's i_mutex had been dropped by the original process. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9b42bffbe07b..9463eeff9e3c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1533,6 +1533,7 @@ struct nfs_renamedata { struct nfs_fattr new_fattr; void (*complete)(struct rpc_task *, struct nfs_renamedata *); long timeout; + bool cancelled; }; struct nfs_access_entry; -- cgit v1.2.3 From b5973a8c1ccf375c9ab9e2428e1185e3f799af06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Jun 2017 19:35:36 -0400 Subject: NFS: Remove unused fields in the page I/O structures Remove the 'layout_private' fields that were only used by the pNFS OSD layout driver. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 1 - include/linux/nfs_xdr.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 247cc3d3498f..6138cf91346b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -94,7 +94,6 @@ struct nfs_pageio_descriptor { const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; struct nfs_direct_req *pg_dreq; - void *pg_layout_private; unsigned int pg_bsize; /* default bsize for mirrors */ u32 pg_mirror_count; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9463eeff9e3c..7f1e04941763 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1436,7 +1436,6 @@ struct nfs_pgio_header { const struct nfs_pgio_completion_ops *completion_ops; const struct nfs_rw_ops *rw_ops; struct nfs_direct_req *dreq; - void *layout_private; spinlock_t lock; /* fields protected by lock */ int pnfs_error; -- cgit v1.2.3 From 919e3bd9a87593520a2c5dfda27bd3e6599852ed Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Jun 2017 19:35:37 -0400 Subject: NFS: Ensure we commit after writeback is complete If the page cache is being flushed, then we want to ensure that we do start a commit once the pages are done being flushed. If we just wait until all I/O is done to that file, we can end up livelocking until the balance_dirty_pages() mechanism puts its foot down and forces I/O to stop. So instead we do more or less the same thing that O_DIRECT does, and set up a counter to tell us when the flush is done, Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 1 + include/linux/nfs_xdr.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 6138cf91346b..abbee2d15dce 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -93,6 +93,7 @@ struct nfs_pageio_descriptor { const struct rpc_call_ops *pg_rpc_callops; const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; + struct nfs_io_completion *pg_io_completion; struct nfs_direct_req *pg_dreq; unsigned int pg_bsize; /* default bsize for mirrors */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7f1e04941763..89093341f076 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1422,6 +1422,7 @@ enum { NFS_IOHDR_STAT, }; +struct nfs_io_completion; struct nfs_pgio_header { struct inode *inode; struct rpc_cred *cred; @@ -1435,6 +1436,7 @@ struct nfs_pgio_header { void (*release) (struct nfs_pgio_header *hdr); const struct nfs_pgio_completion_ops *completion_ops; const struct nfs_rw_ops *rw_ops; + struct nfs_io_completion *io_completion; struct nfs_direct_req *dreq; spinlock_t lock; /* fields protected by lock */ -- cgit v1.2.3 From 8dcbec6d20eb881ba368d0aebc3a8a678aebb1da Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 8 Jun 2017 11:52:44 -0400 Subject: NFSv4.1: Handle EXCHGID4_FLAG_CONFIRMED_R during NFSv4.1 migration Transparent State Migration copies a client's lease state from the server where a filesystem used to reside to the server where it now resides. When an NFSv4.1 client first contacts that destination server, it uses EXCHANGE_ID to detect trunking relationships. The lease that was copied there is returned to that client, but the destination server sets EXCHGID4_FLAG_CONFIRMED_R when replying to the client. This is because the lease was confirmed on the source server (before it was copied). Normally, when CONFIRMED_R is set, a client purges the lease and creates a new one. However, that throws away the entire benefit of Transparent State Migration. Therefore, the client must not purge that lease when it is possible that Transparent State Migration has occurred. Reported-by: Xuan Qi Signed-off-by: Chuck Lever Tested-by: Xuan Qi Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e418a1096662..74c44665e6d3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -42,6 +42,7 @@ struct nfs_client { #define NFS_CS_MIGRATION 2 /* - transparent state migr */ #define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ #define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */ +#define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -210,6 +211,7 @@ struct nfs_server { unsigned long mig_status; #define NFS_MIG_IN_TRANSITION (1) #define NFS_MIG_FAILED (2) +#define NFS_MIG_TSM_POSSIBLE (3) void (*destroy)(struct nfs_server *); -- cgit v1.2.3 From f174ff7a0ab6a097455a94abfc99517940041c07 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 29 Jun 2017 06:34:51 -0700 Subject: nfs: add a nfs_ilookup helper This helper will allow to find an existing NFS inode by the file handle and fattr. Signed-off-by: Peng Tao [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index bb0eb2c9acca..e52cc55ac300 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -332,6 +332,7 @@ extern void nfs_zap_caches(struct inode *); extern void nfs_invalidate_atime(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *); +struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *); extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); -- cgit v1.2.3 From 5b5faaf6df73412af0278997db36dbcb51011d9d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 29 Jun 2017 06:34:52 -0700 Subject: nfs4: add NFSv4 LOOKUPP handlers This will be needed in order to implement the get_parent export op for nfsd. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 1b1ca04820a3..47239c336688 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -479,6 +479,7 @@ enum { NFSPROC4_CLNT_ACCESS, NFSPROC4_CLNT_GETATTR, NFSPROC4_CLNT_LOOKUP, + NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LOOKUP_ROOT, NFSPROC4_CLNT_REMOVE, NFSPROC4_CLNT_RENAME, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 89093341f076..ca3bcc4ed4e5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1012,7 +1012,6 @@ struct nfs4_link_res { struct nfs_fattr * dir_attr; }; - struct nfs4_lookup_arg { struct nfs4_sequence_args seq_args; const struct nfs_fh * dir_fh; @@ -1028,6 +1027,20 @@ struct nfs4_lookup_res { struct nfs4_label *label; }; +struct nfs4_lookupp_arg { + struct nfs4_sequence_args seq_args; + const struct nfs_fh *fh; + const u32 *bitmask; +}; + +struct nfs4_lookupp_res { + struct nfs4_sequence_res seq_res; + const struct nfs_server *server; + struct nfs_fattr *fattr; + struct nfs_fh *fh; + struct nfs4_label *label; +}; + struct nfs4_lookup_root_arg { struct nfs4_sequence_args seq_args; const u32 * bitmask; @@ -1569,6 +1582,8 @@ struct nfs_rpc_ops { int (*lookup) (struct inode *, const struct qstr *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *); + int (*lookupp) (struct inode *, struct nfs_fh *, + struct nfs_fattr *, struct nfs4_label *); int (*access) (struct inode *, struct nfs_access_entry *); int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); -- cgit v1.2.3 From 301bfa483016d48b7fb9cbad87c0a04a15c25b90 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:53:48 -0400 Subject: NFS: Don't run wake_up_bit() when nobody is waiting... "perf lock" shows fairly heavy contention for the bit waitqueue locks when doing an I/O heavy workload. Use a bit to tell whether or not there has been contention for a lock so that we can optimise away the bit waitqueue options in those cases. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index abbee2d15dce..d67b67ae6c8b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -33,6 +33,8 @@ enum { PG_UPTODATE, /* page group sync bit in read path */ PG_WB_END, /* page group sync bit in write path */ PG_REMOVE, /* page group sync bit in write path */ + PG_CONTENDED1, /* Is someone waiting for a lock? */ + PG_CONTENDED2, /* Is someone waiting for a lock? */ }; struct nfs_inode; -- cgit v1.2.3 From b4f937cffa66b3d56eb8f586e620d0b223a281a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:53:48 -0400 Subject: NFS: Don't run wake_up_bit() when nobody is waiting... "perf lock" shows fairly heavy contention for the bit waitqueue locks when doing an I/O heavy workload. Use a bit to tell whether or not there has been contention for a lock so that we can optimise away the bit waitqueue options in those cases. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index abbee2d15dce..d67b67ae6c8b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -33,6 +33,8 @@ enum { PG_UPTODATE, /* page group sync bit in read path */ PG_WB_END, /* page group sync bit in write path */ PG_REMOVE, /* page group sync bit in write path */ + PG_CONTENDED1, /* Is someone waiting for a lock? */ + PG_CONTENDED2, /* Is someone waiting for a lock? */ }; struct nfs_inode; -- cgit v1.2.3 From 2b02c20ce0c28974b44e69a2e2f5ddc6a470ad6f Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Tue, 11 Jul 2017 17:21:52 +0200 Subject: cdc_ncm: Set NTB format again after altsetting switch for Huawei devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some firmwares in Huawei E3372H devices have been observed to switch back to NTB 32-bit format after altsetting switch. This patch implements a driver flag to check for the device settings and set NTB format to 16-bit again if needed. The flag has been activated for devices controlled by the huawei_cdc_ncm.c driver. V1->V2: - fixed broken error checks - some corrections to the commit message V2->V3: - variable name changes, to clarify what's happening - check (and possibly set) the NTB format later in the common bind code path Signed-off-by: Enrico Mioso Reported-and-tested-by: Christian Panton Reviewed-by: Bjørn Mork CC: Bjørn Mork CC: Christian Panton CC: linux-usb@vger.kernel.org CC: netdev@vger.kernel.org CC: Oliver Neukum Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 021f7a88f52c..1a59699cf82a 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -83,6 +83,7 @@ /* Driver flags */ #define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ #define CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE 0x04 /* Avoid altsetting toggle during init */ +#define CDC_NCM_FLAG_RESET_NTB16 0x08 /* set NDP16 one more time after altsetting switch */ #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) -- cgit v1.2.3 From 76250f2b743b72cb685cc51ac0cdabb32957180b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 12:40:01 +0000 Subject: dma-buf/fence: Avoid use of uninitialised timestamp [ 236.821534] WARNING: kmemcheck: Caught 64-bit read from uninitialized memory (ffff8802538683d0) [ 236.828642] 420000001e7f0000000000000000000000080000000000000000000000000000 [ 236.839543] i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u [ 236.850420] ^ [ 236.854123] RIP: 0010:[] [] fence_signal+0x17/0xd0 [ 236.861313] RSP: 0018:ffff88024acd7ba0 EFLAGS: 00010282 [ 236.865027] RAX: ffffffff812f6a90 RBX: ffff8802527ca800 RCX: ffff880252cb30e0 [ 236.868801] RDX: ffff88024ac5d918 RSI: ffff880252f780e0 RDI: ffff880253868380 [ 236.872579] RBP: ffff88024acd7bc0 R08: ffff88024acd7be0 R09: 0000000000000000 [ 236.876407] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880253868380 [ 236.880185] R13: ffff8802538684d0 R14: ffff880253868380 R15: ffff88024cd48e00 [ 236.883983] FS: 00007f1646d1a740(0000) GS:ffff88025d000000(0000) knlGS:0000000000000000 [ 236.890959] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 236.894702] CR2: ffff880251360318 CR3: 000000024ad21000 CR4: 00000000001406f0 [ 236.898481] [] i915_gem_request_retire+0x1cd/0x230 [ 236.902439] [] i915_gem_request_alloc+0xa3/0x2f0 [ 236.906435] [] i915_gem_do_execbuffer.isra.41+0xb6d/0x18b0 [ 236.910434] [] i915_gem_execbuffer2+0x95/0x1e0 [ 236.914390] [] drm_ioctl+0x1e5/0x460 [ 236.918275] [] do_vfs_ioctl+0x8f/0x5c0 [ 236.922168] [] SyS_ioctl+0x3c/0x70 [ 236.926090] [] entry_SYSCALL_64_fastpath+0x17/0x93 [ 236.930045] [] 0xffffffffffffffff We only set the timestamp before we mark the fence as signaled. It is done before to avoid observers having a window in which they may see the fence as complete but no timestamp. Having it does incur a potential for the timestamp to be written twice, and even for it to be corrupted if the u64 write is not atomic. Instead use a new bit to record the presence of the timestamp, and teach the readers to wait until it is set if the fence is complete. There still remains a race where the timestamp for the signaled fence may be shown before the fence is reported as signaled, but that's a pre-existing error. Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Gustavo Padovan Cc: Daniel Vetter Reported-by: Rafael Antognolli Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170214124001.1930-1-chris@chris-wilson.co.uk --- include/linux/dma-fence.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index a5195a7d6f77..0a186c4f3981 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -55,6 +55,7 @@ struct dma_fence_cb; * of the time. * * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled + * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the * implementer of the fence for its own purposes. Can be used in different @@ -84,6 +85,7 @@ struct dma_fence { enum dma_fence_flag_bits { DMA_FENCE_FLAG_SIGNALED_BIT, + DMA_FENCE_FLAG_TIMESTAMP_BIT, DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, DMA_FENCE_FLAG_USER_BITS, /* must always be last member */ }; -- cgit v1.2.3 From 9049f2f6e7bdfb5de0c63c2635bf3cdb70c4efb5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 14 Jul 2017 14:49:52 -0700 Subject: fault-inject: parse as natural 1-based value for fail-nth write interface The value written to fail-nth file is parsed as 0-based. Parsing as one-based is more natural to understand and it enables to cancel the previous setup by simply writing '0'. This change also converts task->fail_nth from signed to unsigned int. Link: http://lkml.kernel.org/r/1491490561-10485-3-git-send-email-akinobu.mita@gmail.com Signed-off-by: Akinobu Mita Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3822d749fc9e..2ba9ec93423f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -974,7 +974,7 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; - int fail_nth; + unsigned int fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call -- cgit v1.2.3 From 077d2ba519b2e8bf1abd80cbade699b1de42cafe Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 14 Jul 2017 17:28:12 -0400 Subject: replace incorrect strscpy use in FORTIFY_SOURCE Using strscpy was wrong because FORTIFY_SOURCE is passing the maximum possible size of the outermost object, but strscpy defines the count parameter as the exact buffer size, so this could copy past the end of the source. This would still be wrong with the planned usage of __builtin_object_size(p, 1) for intra-object overflow checks since it's the maximum possible size of the specified object with no guarantee of it being that large. Reuse of the fortified functions like this currently makes the runtime error reporting less precise but that can be improved later on. Noticed by Dave Jones and KASAN. Signed-off-by: Daniel Micay Acked-by: Kees Cook Reported-by: Dave Jones Signed-off-by: Linus Torvalds --- include/linux/string.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 96f5a5fd0377..049866760e8b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -202,17 +202,6 @@ void __read_overflow2(void) __compiletime_error("detected read beyond size of ob void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) -__FORTIFY_INLINE char *strcpy(char *p, const char *q) -{ - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); - if (p_size == (size_t)-1 && q_size == (size_t)-1) - return __builtin_strcpy(p, q); - if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0) - fortify_panic(__func__); - return p; -} - __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -391,6 +380,18 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) fortify_panic(__func__); return __real_kmemdup(p, size, gfp); } + +/* defined after fortified strlen and memcpy to reuse them */ +__FORTIFY_INLINE char *strcpy(char *p, const char *q) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strcpy(p, q); + memcpy(p, q, strlen(q) + 1); + return p; +} + #endif #endif /* _LINUX_STRING_H_ */ -- cgit v1.2.3 From e67ae2b7b23b283e657865b498b151e6a17b919d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Jul 2017 13:17:26 +0200 Subject: libceph: fix old style declaration warnings The new macros don't follow the usual style for declarations, which we get a warning for with 'make W=1': In file included from fs/ceph/mds_client.c:16:0: include/linux/ceph/ceph_features.h:74:1: error: 'static' is not at beginning of declaration [-Werror=old-style-declaration] This moves the 'static' keyword to the front of the declaration. Fixes: f179d3ba8cb9 ("libceph: new features macros") Signed-off-by: Arnd Bergmann Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index f0f6c537b64c..040dd105c3e7 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -10,14 +10,14 @@ #define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL #define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ - const static uint64_t CEPH_FEATURE_##name = (1ULL< Date: Fri, 14 Jul 2017 18:16:44 +0530 Subject: mmc: host: omap_hsmmc: remove unused platform callbacks Remove unused callbacks in the omap_hsmmc_platform_data structure Signed-off-by: Faiz Abbas Acked-by: Tony Lindgren Signed-off-by: Ulf Hansson --- include/linux/platform_data/hsmmc-omap.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h index 8e981be2e2c2..0ff1e0dba720 100644 --- a/include/linux/platform_data/hsmmc-omap.h +++ b/include/linux/platform_data/hsmmc-omap.h @@ -55,9 +55,6 @@ struct omap_hsmmc_platform_data { u32 caps; /* Used for the MMC driver on 2430 and later */ u32 pm_caps; /* PM capabilities of the mmc */ - /* use the internal clock */ - unsigned internal_clock:1; - /* nonremovable e.g. eMMC */ unsigned nonremovable:1; @@ -73,13 +70,6 @@ struct omap_hsmmc_platform_data { int gpio_cd; /* gpio (card detect) */ int gpio_cod; /* gpio (cover detect) */ int gpio_wp; /* gpio (write protect) */ - - int (*set_power)(struct device *dev, int power_on, int vdd); - void (*remux)(struct device *dev, int power_on); - /* Call back before enabling / disabling regulators */ - void (*before_set_reg)(struct device *dev, int power_on, int vdd); - /* Call back after enabling / disabling regulators */ - void (*after_set_reg)(struct device *dev, int power_on, int vdd); /* if we have special card, init it using this callback */ void (*init_card)(struct mmc_card *card); -- cgit v1.2.3 From cf56c2f892a8a1870a8358114ad896772da7543a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Jul 2017 23:17:44 +0200 Subject: netfilter: remove old pre-netns era hook api no more users in the tree, remove this. The old api is racy wrt. module removal, all users have been converted to the netns-aware api. The old api pretended we still have global hooks but that has not been true for a long time. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index a4b97be30b28..22f081065d49 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -61,8 +61,6 @@ typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); struct nf_hook_ops { - struct list_head list; - /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; @@ -160,13 +158,6 @@ int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); -int nf_register_hook(struct nf_hook_ops *reg); -void nf_unregister_hook(struct nf_hook_ops *reg); -int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); -void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); -int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); -void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); - /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ int nf_register_sockopt(struct nf_sockopt_ops *reg); -- cgit v1.2.3 From 13c401f33e19c20431d9888a91d9ea82e5133bd9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 14 Jul 2017 23:03:49 -0700 Subject: jhash: fix -Wimplicit-fallthrough warnings GCC 7 added a new -Wimplicit-fallthrough warning. It's only enabled with W=1, but since linux/jhash.h is included in over hundred places (including other global headers) it seems worthwhile fixing this warning. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/jhash.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 348c6f47e4cc..8037850f3104 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -85,19 +85,18 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) k += 12; } /* Last block: affect all 32 bits of (c) */ - /* All the case statements fall through */ switch (length) { - case 12: c += (u32)k[11]<<24; - case 11: c += (u32)k[10]<<16; - case 10: c += (u32)k[9]<<8; - case 9: c += k[8]; - case 8: b += (u32)k[7]<<24; - case 7: b += (u32)k[6]<<16; - case 6: b += (u32)k[5]<<8; - case 5: b += k[4]; - case 4: a += (u32)k[3]<<24; - case 3: a += (u32)k[2]<<16; - case 2: a += (u32)k[1]<<8; + case 12: c += (u32)k[11]<<24; /* fall through */ + case 11: c += (u32)k[10]<<16; /* fall through */ + case 10: c += (u32)k[9]<<8; /* fall through */ + case 9: c += k[8]; /* fall through */ + case 8: b += (u32)k[7]<<24; /* fall through */ + case 7: b += (u32)k[6]<<16; /* fall through */ + case 6: b += (u32)k[5]<<8; /* fall through */ + case 5: b += k[4]; /* fall through */ + case 4: a += (u32)k[3]<<24; /* fall through */ + case 3: a += (u32)k[2]<<16; /* fall through */ + case 2: a += (u32)k[1]<<8; /* fall through */ case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ @@ -131,10 +130,10 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) k += 3; } - /* Handle the last 3 u32's: all the case statements fall through */ + /* Handle the last 3 u32's */ switch (length) { - case 3: c += k[2]; - case 2: b += k[1]; + case 3: c += k[2]; /* fall through */ + case 2: b += k[1]; /* fall through */ case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ -- cgit v1.2.3 From df39a9f106d53532443a804352894480ca6ca5fd Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 17 Jul 2017 11:42:55 -0700 Subject: bpf: check NULL for sk_to_full_sk() return value When req->rsk_listener is NULL, sk_to_full_sk() returns NULL too, so we have to check its return value against NULL here. Fixes: 40304b2a1567 ("bpf: BPF support for sock_ops") Reported-by: David Ahern Tested-by: David Ahern Cc: Lawrence Brakmo Cc: Daniel Borkmann Signed-off-by: Cong Wang Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 360c082e885c..d41d40ac3efd 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -85,7 +85,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled && (sock_ops)->sk) { \ typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \ - if (sk_fullsock(__sk)) \ + if (__sk && sk_fullsock(__sk)) \ __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \ sock_ops, \ BPF_CGROUP_SOCK_OPS); \ -- cgit v1.2.3 From 8900b894e769dd88b53e519e3502e0e3c349fe95 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:15 +0300 Subject: {net, IB}/mlx4: Remove gfp flags argument The caller to the driver marks GFP_NOIO allocations with help of memalloc_noio-* calls now. This makes redundant to pass down to the driver gfp flags, which can be GFP_KERNEL only. The patch removes the gfp flags argument and updates all driver paths. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d5bed0875d30..aad5d81dfb44 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1068,7 +1068,7 @@ static inline int mlx4_is_eth(struct mlx4_dev *dev, int port) } int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf, gfp_t gfp); + struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { @@ -1105,10 +1105,9 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf, gfp_t gfp); + struct mlx4_buf *buf); -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, - gfp_t gfp); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, @@ -1124,8 +1123,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base, u8 flags); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, - gfp_t gfp); +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn, -- cgit v1.2.3 From 8bd226f9a7dc18740a916dcba3112f2bfc3ad9e8 Mon Sep 17 00:00:00 2001 From: Ruslan Bilovol Date: Sun, 25 Jun 2017 16:23:45 +0300 Subject: include: usb: audio: specify exact endiannes of descriptors USB spec says that multiple byte fields are stored in little-endian order (see chapter 8.1 of USB2.0 spec and chapter 7.1 of USB3.0 spec), thus mark such fields as LE for UAC1 and UAC2 headers Signed-off-by: Ruslan Bilovol Signed-off-by: Felipe Balbi --- include/linux/usb/audio-v2.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index c5f2158ab00e..fd73bc0e9027 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -115,13 +115,13 @@ struct uac2_input_terminal_descriptor { __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bTerminalID; - __u16 wTerminalType; + __le16 wTerminalType; __u8 bAssocTerminal; __u8 bCSourceID; __u8 bNrChannels; - __u32 bmChannelConfig; + __le32 bmChannelConfig; __u8 iChannelNames; - __u16 bmControls; + __le16 bmControls; __u8 iTerminal; } __attribute__((packed)); @@ -132,11 +132,11 @@ struct uac2_output_terminal_descriptor { __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bTerminalID; - __u16 wTerminalType; + __le16 wTerminalType; __u8 bAssocTerminal; __u8 bSourceID; __u8 bCSourceID; - __u16 bmControls; + __le16 bmControls; __u8 iTerminal; } __attribute__((packed)); @@ -164,9 +164,9 @@ struct uac2_as_header_descriptor { __u8 bTerminalLink; __u8 bmControls; __u8 bFormatType; - __u32 bmFormats; + __le32 bmFormats; __u8 bNrChannels; - __u32 bmChannelConfig; + __le32 bmChannelConfig; __u8 iChannelNames; } __attribute__((packed)); -- cgit v1.2.3 From beaec533fc2701a28a4d667f67c9f59c6e4e0d13 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 19 Jul 2017 20:27:30 +0200 Subject: llist: clang: introduce member_address_is_nonnull() Currently llist_for_each_entry() and llist_for_each_entry_safe() iterate until &pos->member != NULL. But when building the kernel with Clang, the compiler assumes &pos->member cannot be NULL if the member's offset is greater than 0 (which would be equivalent to the object being non-contiguous in memory). Therefore the loop condition is always true, and the loops become infinite. To work around this, introduce the member_address_is_nonnull() macro, which casts object pointer to uintptr_t, thus letting the member pointer to be NULL. Signed-off-by: Alexander Potapenko Tested-by: Sodagudi Prasad Signed-off-by: Linus Torvalds --- include/linux/llist.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/llist.h b/include/linux/llist.h index d11738110a7a..1957635e6d5f 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -92,6 +92,23 @@ static inline void init_llist_head(struct llist_head *list) #define llist_entry(ptr, type, member) \ container_of(ptr, type, member) +/** + * member_address_is_nonnull - check whether the member address is not NULL + * @ptr: the object pointer (struct type * that contains the llist_node) + * @member: the name of the llist_node within the struct. + * + * This macro is conceptually the same as + * &ptr->member != NULL + * but it works around the fact that compilers can decide that taking a member + * address is never a NULL pointer. + * + * Real objects that start at a high address and have a member at NULL are + * unlikely to exist, but such pointers may be returned e.g. by the + * container_of() macro. + */ +#define member_address_is_nonnull(ptr, member) \ + ((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0) + /** * llist_for_each - iterate over some deleted entries of a lock-less list * @pos: the &struct llist_node to use as a loop cursor @@ -145,7 +162,7 @@ static inline void init_llist_head(struct llist_head *list) */ #define llist_for_each_entry(pos, node, member) \ for ((pos) = llist_entry((node), typeof(*(pos)), member); \ - &(pos)->member != NULL; \ + member_address_is_nonnull(pos, member); \ (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) /** @@ -167,7 +184,7 @@ static inline void init_llist_head(struct llist_head *list) */ #define llist_for_each_entry_safe(pos, n, node, member) \ for (pos = llist_entry((node), typeof(*pos), member); \ - &pos->member != NULL && \ + member_address_is_nonnull(pos, member) && \ (n = llist_entry(pos->member.next, typeof(*n), member), true); \ pos = n) -- cgit v1.2.3 From f86f418059b94aa01f9342611a272ca60c583e89 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 7 Jun 2017 16:12:51 +0800 Subject: trace: fix the errors caused by incompatible type of RCU variables The variables which are processed by RCU functions should be annotated as RCU, otherwise sparse will report the errors like below: "error: incompatible types in comparison expression (different address spaces)" Link: http://lkml.kernel.org/r/1496823171-7758-1-git-send-email-zhang.chunyan@linaro.org Signed-off-by: Chunyan Zhang [ Updated to not be 100% 80 column strict ] Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 +++--- include/linux/trace_events.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5857390ac35a..6383115e9d2c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -145,8 +145,8 @@ enum { #ifdef CONFIG_DYNAMIC_FTRACE /* The hash used to know what functions callbacks trace */ struct ftrace_ops_hash { - struct ftrace_hash *notrace_hash; - struct ftrace_hash *filter_hash; + struct ftrace_hash __rcu *notrace_hash; + struct ftrace_hash __rcu *filter_hash; struct mutex regex_lock; }; @@ -168,7 +168,7 @@ static inline void ftrace_free_init_mem(void) { } */ struct ftrace_ops { ftrace_func_t func; - struct ftrace_ops *next; + struct ftrace_ops __rcu *next; unsigned long flags; void *private; ftrace_func_t saved_func; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index f73cedfa2e0b..536c80ff7ad9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -338,7 +338,7 @@ enum { struct trace_event_file { struct list_head list; struct trace_event_call *event_call; - struct event_filter *filter; + struct event_filter __rcu *filter; struct dentry *dir; struct trace_array *tr; struct trace_subsystem_dir *system; -- cgit v1.2.3 From 43fc509c3efb5c973991ee24c449ab2a0d71dd1e Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 20 Jul 2017 11:19:58 +0100 Subject: dma-coherent: introduce interface for default DMA pool Christoph noticed [1] that default DMA pool in current form overload the DMA coherent infrastructure. In reply, Robin suggested [2] to split the per-device vs. global pool interfaces, so allocation/release from default DMA pool is driven by dma ops implementation. This patch implements Robin's idea and provide interface to allocate/release/mmap the default (aka global) DMA pool. To make it clear that existing *_from_coherent routines work on per-device pool rename them to *_from_dev_coherent. [1] https://lkml.org/lkml/2017/7/7/370 [2] https://lkml.org/lkml/2017/7/7/431 Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Suggested-by: Robin Murphy Tested-by: Andras Szemzo Reviewed-by: Robin Murphy Signed-off-by: Vladimir Murzin Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 843ab866e0f4..03c0196a6f24 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -157,16 +157,40 @@ static inline int is_device_dma_capable(struct device *dev) * These three functions are only for dma allocator. * Don't use them in device drivers. */ -int dma_alloc_from_coherent(struct device *dev, ssize_t size, +int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret); -int dma_release_from_coherent(struct device *dev, int order, void *vaddr); +int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); -int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, +int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, size_t size, int *ret); + +void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle); +int dma_release_from_global_coherent(int order, void *vaddr); +int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, + size_t size, int *ret); + #else -#define dma_alloc_from_coherent(dev, size, handle, ret) (0) -#define dma_release_from_coherent(dev, order, vaddr) (0) -#define dma_mmap_from_coherent(dev, vma, vaddr, order, ret) (0) +#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) +#define dma_release_from_dev_coherent(dev, order, vaddr) (0) +#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) + +static inline void *dma_alloc_from_global_coherent(ssize_t size, + dma_addr_t *dma_handle) +{ + return NULL; +} + +static inline int dma_release_from_global_coherent(int order, void *vaddr) +{ + return 0; +} + +static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, + void *cpu_addr, size_t size, + int *ret) +{ + return 0; +} #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ #ifdef CONFIG_HAS_DMA @@ -481,7 +505,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, BUG_ON(!ops); - if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr)) + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; if (!arch_dma_alloc_attrs(&dev, &flag)) @@ -503,7 +527,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, BUG_ON(!ops); WARN_ON(irqs_disabled()); - if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr)) return; if (!ops->free || !cpu_addr) -- cgit v1.2.3 From dc1a0afbacaeaced8f5679a99047c0467f1099e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Jul 2017 11:12:09 +0200 Subject: nvme: fix byte swapping in the streams code Signed-off-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/nvme.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6b8ee9e628e1..bc74da018bdc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -963,14 +963,14 @@ struct nvme_dbbuf { }; struct streams_directive_params { - __u16 msl; - __u16 nssa; - __u16 nsso; + __le16 msl; + __le16 nssa; + __le16 nsso; __u8 rsvd[10]; - __u32 sws; - __u16 sgs; - __u16 nsa; - __u16 nso; + __le32 sws; + __le16 sgs; + __le16 nsa; + __le16 nso; __u8 rsvd2[6]; }; -- cgit v1.2.3 From 4cabc5b186b5427b9ee5a7495172542af105f02b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jul 2017 00:00:21 +0200 Subject: bpf: fix mixed signed/unsigned derived min/max value bounds Edward reported that there's an issue in min/max value bounds tracking when signed and unsigned compares both provide hints on limits when having unknown variables. E.g. a program such as the following should have been rejected: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff8a94cda93400 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+7 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = -1 10: (2d) if r1 > r2 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 11: (65) if r1 s> 0x1 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 12: (0f) r0 += r1 13: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 14: (b7) r0 = 0 15: (95) exit What happens is that in the first part ... 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = -1 10: (2d) if r1 > r2 goto pc+3 ... r1 carries an unsigned value, and is compared as unsigned against a register carrying an immediate. Verifier deduces in reg_set_min_max() that since the compare is unsigned and operation is greater than (>), that in the fall-through/false case, r1's minimum bound must be 0 and maximum bound must be r2. Latter is larger than the bound and thus max value is reset back to being 'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now 'R1=inv,min_value=0'. The subsequent test ... 11: (65) if r1 s> 0x1 goto pc+2 ... is a signed compare of r1 with immediate value 1. Here, verifier deduces in reg_set_min_max() that since the compare is signed this time and operation is greater than (>), that in the fall-through/false case, we can deduce that r1's maximum bound must be 1, meaning with prior test, we result in r1 having the following state: R1=inv,min_value=0,max_value=1. Given that the actual value this holds is -8, the bounds are wrongly deduced. When this is being added to r0 which holds the map_value(_adj) type, then subsequent store access in above case will go through check_mem_access() which invokes check_map_access_adj(), that will then probe whether the map memory is in bounds based on the min_value and max_value as well as access size since the actual unknown value is min_value <= x <= max_value; commit fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{, _adj} register types") provides some more explanation on the semantics. It's worth to note in this context that in the current code, min_value and max_value tracking are used for two things, i) dynamic map value access via check_map_access_adj() and since commit 06c1c049721a ("bpf: allow helpers access to variable memory") ii) also enforced at check_helper_mem_access() when passing a memory address (pointer to packet, map value, stack) and length pair to a helper and the length in this case is an unknown value defining an access range through min_value/max_value in that case. The min_value/max_value tracking is /not/ used in the direct packet access case to track ranges. However, the issue also affects case ii), for example, the following crafted program based on the same principle must be rejected as well: 0: (b7) r2 = 0 1: (bf) r3 = r10 2: (07) r3 += -512 3: (7a) *(u64 *)(r10 -16) = -8 4: (79) r4 = *(u64 *)(r10 -16) 5: (b7) r6 = -1 6: (2d) if r4 > r6 goto pc+5 R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 7: (65) if r4 s> 0x1 goto pc+4 R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 8: (07) r4 += 1 9: (b7) r5 = 0 10: (6a) *(u16 *)(r10 -512) = 0 11: (85) call bpf_skb_load_bytes#26 12: (b7) r0 = 0 13: (95) exit Meaning, while we initialize the max_value stack slot that the verifier thinks we access in the [1,2] range, in reality we pass -7 as length which is interpreted as u32 in the helper. Thus, this issue is relevant also for the case of helper ranges. Resetting both bounds in check_reg_overflow() in case only one of them exceeds limits is also not enough as similar test can be created that uses values which are within range, thus also here learned min value in r1 is incorrect when mixed with later signed test to create a range: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff880ad081fa00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+7 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = 2 10: (3d) if r2 >= r1 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 11: (65) if r1 s> 0x4 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 12: (0f) r0 += r1 13: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4 R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 14: (b7) r0 = 0 15: (95) exit This leaves us with two options for fixing this: i) to invalidate all prior learned information once we switch signed context, ii) to track min/max signed and unsigned boundaries separately as done in [0]. (Given latter introduces major changes throughout the whole verifier, it's rather net-next material, thus this patch follows option i), meaning we can derive bounds either from only signed tests or only unsigned tests.) There is still the case of adjust_reg_min_max_vals(), where we adjust bounds on ALU operations, meaning programs like the following where boundaries on the reg get mixed in context later on when bounds are merged on the dst reg must get rejected, too: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff89b2bf87ce00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+6 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = 2 10: (3d) if r2 >= r1 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 11: (b7) r7 = 1 12: (65) if r7 s> 0x0 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp 13: (b7) r0 = 0 14: (95) exit from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp 15: (0f) r7 += r1 16: (65) if r7 s> 0x4 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp 17: (0f) r0 += r7 18: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp 19: (b7) r0 = 0 20: (95) exit Meaning, in adjust_reg_min_max_vals() we must also reset range values on the dst when src/dst registers have mixed signed/ unsigned derived min/max value bounds with one unbounded value as otherwise they can be added together deducing false boundaries. Once both boundaries are established from either ALU ops or compare operations w/o mixing signed/unsigned insns, then they can safely be added to other regs also having both boundaries established. Adding regs with one unbounded side to a map value where the bounded side has been learned w/o mixing ops is possible, but the resulting map value won't recover from that, meaning such op is considered invalid on the time of actual access. Invalid bounds are set on the dst reg in case i) src reg, or ii) in case dst reg already had them. The only way to recover would be to perform i) ALU ops but only 'add' is allowed on map value types or ii) comparisons, but these are disallowed on pointers in case they span a range. This is fine as only BPF_JEQ and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers which potentially turn them into PTR_TO_MAP_VALUE type depending on the branch, so only here min/max value cannot be invalidated for them. In terms of state pruning, value_from_signed is considered as well in states_equal() when dealing with adjusted map values. With regards to breaking existing programs, there is a small risk, but use-cases are rather quite narrow where this could occur and mixing compares probably unlikely. Joint work with Josef and Edward. [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html Fixes: 484611357c19 ("bpf: allow access into map value arrays") Reported-by: Edward Cree Signed-off-by: Daniel Borkmann Signed-off-by: Edward Cree Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 621076f56251..8e5d31f6faef 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -43,6 +43,7 @@ struct bpf_reg_state { u32 min_align; u32 aux_off; u32 aux_off_align; + bool value_from_signed; }; enum bpf_stack_slot_type { -- cgit v1.2.3 From bd8b2441742b49c76bec707757bd9c028ea9838e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:54:34 -0400 Subject: NFS: Store the raw NFS access mask in the inode's access cache Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index e52cc55ac300..5cc91d6381a3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -51,7 +51,7 @@ struct nfs_access_entry { struct list_head lru; unsigned long jiffies; struct rpc_cred * cred; - int mask; + __u32 mask; struct rcu_head rcu_head; }; -- cgit v1.2.3 From 832e4c83abc5ec25af77db6c8a0f36d78f1cf825 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 May 2017 09:16:24 +0200 Subject: uuid: remove uuid_be Everything uses uuid_t now. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- include/linux/uuid.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 2251e1925ea4..33b0bdbb613c 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -84,26 +84,12 @@ int guid_parse(const char *uuid, guid_t *u); int uuid_parse(const char *uuid, uuid_t *u); /* backwards compatibility, don't use in new code */ -typedef uuid_t uuid_be; -#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ - UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) -#define NULL_UUID_BE \ - UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ - 0x00, 0x00, 0x00, 0x00) - #define uuid_le_gen(u) guid_gen(u) -#define uuid_be_gen(u) uuid_gen(u) #define uuid_le_to_bin(guid, u) guid_parse(guid, u) -#define uuid_be_to_bin(uuid, u) uuid_parse(uuid, u) static inline int uuid_le_cmp(const guid_t u1, const guid_t u2) { return memcmp(&u1, &u2, sizeof(guid_t)); } -static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2) -{ - return memcmp(&u1, &u2, sizeof(uuid_t)); -} - #endif -- cgit v1.2.3 From 6c423f5751b9f68bfe7c7545519d4c7159f93e1b Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 24 Jul 2017 13:58:00 -0600 Subject: sched/wait: Clean up some documentation warnings A couple of kerneldoc comments in had incorrect names for macro parameters, with this unsightly result: ./include/linux/wait.h:555: warning: No description found for parameter 'wq' ./include/linux/wait.h:555: warning: Excess function parameter 'wq_head' description in 'wait_event_interruptible_hrtimeout' ./include/linux/wait.h:759: warning: No description found for parameter 'wq_head' ./include/linux/wait.h:759: warning: Excess function parameter 'wq' description in 'wait_event_killable' Correct the comments and kill the warnings. Signed-off-by: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20170724135800.769c4042@lwn.net Signed-off-by: Ingo Molnar --- include/linux/wait.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index b289c96151ee..5b74e36c0ca8 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -529,13 +529,13 @@ do { \ /** * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses - * @wq_head: the waitqueue to wait on + * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * @timeout: timeout, as a ktime_t * * The process is put to sleep (TASK_INTERRUPTIBLE) until the * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq_head is woken up. + * The @condition is checked each time the waitqueue @wq is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. @@ -735,12 +735,12 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); /** * wait_event_killable - sleep until a condition gets true - * @wq: the waitqueue to wait on + * @wq_head: the waitqueue to wait on * @condition: a C expression for the event to wait for * * The process is put to sleep (TASK_KILLABLE) until the * @condition evaluates to true or a signal is received. - * The @condition is checked each time the waitqueue @wq is woken up. + * The @condition is checked each time the waitqueue @wq_head is woken up. * * wake_up() has to be called after changing any variable that could * change the result of the wait condition. -- cgit v1.2.3 From 2fd4167fadd1360ab015e4f0e88e51843e49556c Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 12 Jul 2017 10:58:19 -0600 Subject: nvme: fabrics commands should use the fctype field for data direction Fabrics commands with opcode 0x7F use the fctype field to indicate data direction. Signed-off-by: Jon Derrick Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Fixes: eb793e2c ("nvme.h: add NVMe over Fabrics definitions") --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bc74da018bdc..25d8225dbd04 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1006,7 +1006,7 @@ static inline bool nvme_is_write(struct nvme_command *cmd) * Why can't we simply have a Fabrics In and Fabrics out command? */ if (unlikely(cmd->common.opcode == nvme_fabrics_command)) - return cmd->fabrics.opcode & 1; + return cmd->fabrics.fctype & 1; return cmd->common.opcode & 1; } -- cgit v1.2.3 From 9c5358e15ca12ed3dc3b1e51671dee5d155de8e0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 17 Jul 2017 13:59:39 -0700 Subject: nvme-fc: revise TRADDR parsing The FC-NVME spec hasn't locked down on the format string for TRADDR. Currently the spec is lobbying for "nn-<16hexdigits>:pn-<16hexdigits>" where the wwn's are hex values but not prefixed by 0x. Most implementations so far expect a string format of "nn-0x<16hexdigits>:pn-0x<16hexdigits>" to be used. The transport uses the match_u64 parser which requires a leading 0x prefix to set the base properly. If it's not there, a match will either fail or return a base 10 value. The resolution in T11 is pushing out. Therefore, to fix things now and to cover any eventuality and any implementations already in the field, this patch adds support for both formats. The change consists of replacing the token matching routine with a routine that validates the fixed string format, and then builds a local copy of the hex name with a 0x prefix before calling the system parser. Note: the same parser routine exists in both the initiator and target transports. Given this is about the only "shared" item, we chose to replicate rather than create an interdendency on some shared code. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- include/linux/nvme-fc.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index 21c37e39e41a..36cca93a5ff2 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -334,5 +334,24 @@ struct fcnvme_ls_disconnect_acc { #define NVME_FC_LS_TIMEOUT_SEC 2 /* 2 seconds */ #define NVME_FC_TGTOP_TIMEOUT_SEC 2 /* 2 seconds */ +/* + * TRADDR string must be of form "nn-<16hexdigits>:pn-<16hexdigits>" + * the string is allowed to be specified with or without a "0x" prefix + * infront of the <16hexdigits>. Without is considered the "min" string + * and with is considered the "max" string. The hexdigits may be upper + * or lower case. + */ +#define NVME_FC_TRADDR_NNLEN 3 /* "?n-" */ +#define NVME_FC_TRADDR_OXNNLEN 5 /* "?n-0x" */ +#define NVME_FC_TRADDR_HEXNAMELEN 16 +#define NVME_FC_TRADDR_MINLENGTH \ + (2 * (NVME_FC_TRADDR_NNLEN + NVME_FC_TRADDR_HEXNAMELEN) + 1) +#define NVME_FC_TRADDR_MAXLENGTH \ + (2 * (NVME_FC_TRADDR_OXNNLEN + NVME_FC_TRADDR_HEXNAMELEN) + 1) +#define NVME_FC_TRADDR_MIN_PN_OFFSET \ + (NVME_FC_TRADDR_NNLEN + NVME_FC_TRADDR_HEXNAMELEN + 1) +#define NVME_FC_TRADDR_MAX_PN_OFFSET \ + (NVME_FC_TRADDR_OXNNLEN + NVME_FC_TRADDR_HEXNAMELEN + 1) + #endif /* _NVME_FC_H */ -- cgit v1.2.3 From fdeaf7e3eb37c6dbc4b4ac97dbe1945d239eb788 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 24 Jul 2017 13:40:03 +0200 Subject: KVM: make pid available for uevents without debugfs Simplify and improve the code so that the PID is always available in the uevent even when debugfs is not available. This adds a userspace_pid field to struct kvm, as per Radim's suggestion, so that the PID can be retrieved on destruction too. Acked-by: Janosch Frank Fixes: 286de8f6ac9202 ("KVM: trigger uevents when creating or destroying a VM") Signed-off-by: Claudio Imbrenda Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 648b34cabb38..890b706d1943 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -445,6 +445,7 @@ struct kvm { struct kvm_stat_data **debugfs_stat_data; struct srcu_struct srcu; struct srcu_struct irq_srcu; + pid_t userspace_pid; }; #define kvm_err(fmt, ...) \ -- cgit v1.2.3 From 273752c9ff03eb83856601b2a3458218bb949e46 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 26 Jul 2017 09:35:09 -0400 Subject: dm, dax: Make sure dm_dax_flush() is called if device supports it Currently dm_dax_flush() is not being called, even if underlying dax device supports write cache, because DAXDEV_WRITE_CACHE is not being propagated up to the DM dax device. If the underlying dax device supports write cache, set DAXDEV_WRITE_CACHE on the DM dax device. This will cause dm_dax_flush() to be called. Fixes: abebfbe2f7 ("dm: add ->flush() dax operation support") Signed-off-by: Vivek Goyal Acked-by: Dan Williams Signed-off-by: Mike Snitzer --- include/linux/dax.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 794811875732..df97b7af7e2c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -87,6 +87,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t size); void dax_write_cache(struct dax_device *dax_dev, bool wc); +bool dax_write_cache_enabled(struct dax_device *dax_dev); /* * We use lowest available bit in exceptional entry for locking, one bit for -- cgit v1.2.3 From a3287c41ff405025bc57b165a0f6cd698bbbc1be Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 25 Jul 2017 16:30:34 +0100 Subject: drivers/perf: arm_pmu: Request PMU SPIs with IRQF_PER_CPU Since the PMU register interface is banked per CPU, CPU PMU interrrupts cannot be handled by a CPU other than the one with the PMU asserting the interrupt. This means that migrating PMU SPIs, as we do during a CPU hotplug operation doesn't make any sense and can lead to the IRQ being disabled entirely if we route a spurious IRQ to the new affinity target. This has been observed in practice on AMD Seattle, where CPUs on the non-boot cluster appear to take a spurious PMU IRQ when coming online, which is routed to CPU0 where it cannot be handled. This patch passes IRQF_PERCPU for PMU SPIs and forcefully sets their affinity prior to requesting them, ensuring that they cannot be migrated during hotplug events. This interacts badly with the DB8500 erratum workaround that ping-pongs the interrupt affinity from the handler, so we avoid passing IRQF_PERCPU in that case by allowing the IRQ flags to be overridden in the platdata. Fixes: 3cf7ee98b848 ("drivers/perf: arm_pmu: move irq request/free into probe") Cc: Mark Rutland Cc: Linus Walleij Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 1360dd6d5e61..af0f44effd44 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -24,10 +24,14 @@ * interrupt and passed the address of the low level handler, * and can be used to implement any platform specific handling * before or after calling it. + * + * @irq_flags: if non-zero, these flags will be passed to request_irq + * when requesting interrupts for this PMU device. */ struct arm_pmu_platdata { irqreturn_t (*handle_irq)(int irq, void *dev, irq_handler_t pmu_handler); + unsigned long irq_flags; }; #ifdef CONFIG_ARM_PMU -- cgit v1.2.3 From 8397913303abc9333f376a518a8368fa22ca5e6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 27 Jul 2017 12:21:11 +0200 Subject: genirq/cpuhotplug: Revert "Set force affinity flag on hotplug migration" That commit was part of the changes moving x86 to the generic CPU hotplug interrupt migration code. The force flag was required on x86 before the hierarchical irqdomain rework, but invoking set_affinity() with force=true stayed and had no side effects. At some point in the past, the force flag got repurposed to support the exynos timer interrupt affinity setting to a not yet online CPU, so the interrupt controller callback does not verify the supplied affinity mask against cpu_online_mask. Setting the flag in the CPU hotplug code causes the cpu online masking to be blocked on these irq controllers and results in potentially affining an interrupt to the CPU which is unplugged, i.e. instead of moving it away, it's just reassigned to it. As the force flags is not longer needed on x86, it's safe to revert that patch so the ARM irqchips which use the force flag work again. Add comments to that effect, so this won't happen again. Note: The online mask handling should be done in the generic code and the force flag and the masking in the irq chips removed all together, but that's not a change possible for 4.13. Fixes: 77f85e66aa8b ("genirq/cpuhotplug: Set force affinity flag on hotplug migration") Reported-by: Will Deacon Signed-off-by: Thomas Gleixner Acked-by: Will Deacon Cc: Marc Zyngier Cc: Russell King Cc: LAK Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707271217590.3109@nanos Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 00db35b61e9e..d2d543794093 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -388,7 +388,12 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_mask_ack: ack and mask an interrupt source * @irq_unmask: unmask an interrupt source * @irq_eoi: end of interrupt - * @irq_set_affinity: set the CPU affinity on SMP machines + * @irq_set_affinity: Set the CPU affinity on SMP machines. If the force + * argument is true, it tells the driver to + * unconditionally apply the affinity setting. Sanity + * checks against the supplied affinity mask are not + * required. This is used for CPU hotplug where the + * target CPU is not yet set in the cpu_online_mask. * @irq_retrigger: resend an IRQ to the CPU * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ * @irq_set_wake: enable/disable power-management wake-on of an IRQ -- cgit v1.2.3