From a7da72eeec78b8ce08a99d132b3e269942b977eb Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 18 Jul 2016 16:16:29 +0900 Subject: extcon: adc-jack: Remove the usage of extcon_set_state() This patch removes the usage of extcon_set_state() because it uses the bit masking to change the state of external connectors. The extcon framework should handle the state by extcon_set/get_cable_state_() with extcon id. Signed-off-by: Chanwoo Choi --- include/linux/extcon/extcon-adc-jack.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h index ac85f2061351..a0e03b13b449 100644 --- a/include/linux/extcon/extcon-adc-jack.h +++ b/include/linux/extcon/extcon-adc-jack.h @@ -20,8 +20,8 @@ /** * struct adc_jack_cond - condition to use an extcon state - * @state: the corresponding extcon state (if 0, this struct * denotes the last adc_jack_cond element among the array) + * @id: the unique id of each external connector * @min_adc: min adc value for this condition * @max_adc: max adc value for this condition * @@ -33,7 +33,7 @@ * because when no adc_jack_cond is met, state = 0 is automatically chosen. */ struct adc_jack_cond { - u32 state; /* extcon state value. 0 if invalid */ + unsigned int id; u32 min_adc; u32 max_adc; }; -- cgit v1.2.3 From 912465bcf869660900cf77c4761869048f3ff063 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 1 Jul 2016 02:41:18 +0900 Subject: extcon: Block the bit masking operation for cable state except for extcon core This patch restrict the usage of extcon_update_state() in the extcon core because the extcon_update_state() use the bit masking to change the state of external connector. When this function is used in device drivers, it may occur the probelm with the handling mistake of bit masking. Also, this patch removes the extcon_get/set_state() functions because these functions use the bit masking which is reluctant way. Instead, extcon provides the extcon_set/get_cable_state_() functions. Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 61004413dc64..667b1d35af12 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -149,20 +149,6 @@ extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, const unsigned int *cable); extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); -/* - * get/set/update_state access the 32b encoded state value, which represents - * states of all possible cables of the multistate port. For example, if one - * calls extcon_set_state(edev, 0x7), it may mean that all the three cables - * are attached to the port. - */ -static inline u32 extcon_get_state(struct extcon_dev *edev) -{ - return edev->state; -} - -extern int extcon_set_state(struct extcon_dev *edev, u32 state); -extern int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state); - /* * get/set_cable_state access each bit of the 32b encoded state value. * They are used to access the status of each cable based on the cable id. @@ -232,22 +218,6 @@ static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, static inline void devm_extcon_dev_free(struct extcon_dev *edev) { } -static inline u32 extcon_get_state(struct extcon_dev *edev) -{ - return 0; -} - -static inline int extcon_set_state(struct extcon_dev *edev, u32 state) -{ - return 0; -} - -static inline int extcon_update_state(struct extcon_dev *edev, u32 mask, - u32 state) -{ - return 0; -} - static inline int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id) { -- cgit v1.2.3 From 55e4e2f129c6664c14166a30f4e0e933ebb61d9b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 11 Jul 2016 16:34:52 +0900 Subject: extcon: Add the extcon_type to gather each connector into five category This patch adds the new extcon type to group the each connecotr into following five category. This type would be used to handle the connectors as a group unit instead of a connector unit. - EXTCON_TYPE_USB : USB connector - EXTCON_TYPE_CHG : Charger connector - EXTCON_TYPE_JACK : Jack connector - EXTCON_TYPE_DISP : Display connector - EXTCON_TYPE_MISC : Miscellaneous connector Also, each external connector is possible to belong to one more extcon type. In caes of EXTCON_CHG_USB_SDP, it have the EXTCON_TYPE_CHG and EXTCON_TYPE_USB. Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Signed-off-by: MyungJoo Ham Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 667b1d35af12..46d802892c82 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -28,6 +28,15 @@ #include +/* + * Define the type of supported external connectors + */ +#define EXTCON_TYPE_USB BIT(0) /* USB connector */ +#define EXTCON_TYPE_CHG BIT(1) /* Charger connector */ +#define EXTCON_TYPE_JACK BIT(2) /* Jack connector */ +#define EXTCON_TYPE_DISP BIT(3) /* Display connector */ +#define EXTCON_TYPE_MISC BIT(4) /* Miscellaneous connector */ + /* * Define the unique id of supported external connectors */ -- cgit v1.2.3 From 792e7e9e5d4358bc6157152b2c07b94eb9e261b0 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 11 Jul 2016 19:30:43 +0900 Subject: extcon: Add the support for extcon property according to extcon type This patch support the extcon property for the external connector because each external connector might have the property according to the H/W design and the specific characteristics. - EXTCON_PROP_USB_[property name] - EXTCON_PROP_CHG_[property name] - EXTCON_PROP_JACK_[property name] - EXTCON_PROP_DISP_[property name] Add the new extcon APIs to get/set the property value as following: - int extcon_get_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value *prop_val) - int extcon_set_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val) Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 46d802892c82..f9d4a44e86d3 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -77,6 +77,63 @@ #define EXTCON_NUM 63 +/* + * Define the property of supported external connectors. + * + * When adding the new extcon property, they *must* have + * the type/value/default information. Also, you *have to* + * modify the EXTCON_PROP_[type]_START/END definitions + * which mean the range of the supported properties + * for each extcon type. + * + * The naming style of property + * : EXTCON_PROP_[type]_[property name] + * + * EXTCON_PROP_USB_[property name] : USB property + * EXTCON_PROP_CHG_[property name] : Charger property + * EXTCON_PROP_JACK_[property name] : Jack property + * EXTCON_PROP_DISP_[property name] : Display property + */ + +/* + * Properties of EXTCON_TYPE_USB. + * + * - EXTCON_PROP_USB_VBUS + * @type: integer (intval) + * @value: 0 (low) or 1 (high) + * @default: 0 (low) + */ +#define EXTCON_PROP_USB_VBUS 0 + +#define EXTCON_PROP_USB_MIN 0 +#define EXTCON_PROP_USB_MAX 0 +#define EXTCON_PROP_USB_CNT (EXTCON_PROP_USB_MAX - EXTCON_PROP_USB_MIN + 1) + +/* Properties of EXTCON_TYPE_CHG. */ +#define EXTCON_PROP_CHG_MIN 50 +#define EXTCON_PROP_CHG_MAX 50 +#define EXTCON_PROP_CHG_CNT (EXTCON_PROP_CHG_MAX - EXTCON_PROP_CHG_MIN + 1) + +/* Properties of EXTCON_TYPE_JACK. */ +#define EXTCON_PROP_JACK_MIN 100 +#define EXTCON_PROP_JACK_MAX 100 +#define EXTCON_PROP_JACK_CNT (EXTCON_PROP_JACK_MAX - EXTCON_PROP_JACK_MIN + 1) + +/* Properties of EXTCON_TYPE_DISP. */ +#define EXTCON_PROP_DISP_MIN 150 +#define EXTCON_PROP_DISP_MAX 150 +#define EXTCON_PROP_DISP_CNT (EXTCON_PROP_DISP_MAX - EXTCON_PROP_DISP_MIN + 1) + +/* + * Define the type of property's value. + * + * Define the property's value as union type. Because each property + * would need the different data type to store it. + */ +union extcon_property_value { + int intval; /* type : integer (intval) */ +}; + struct extcon_cable; /** @@ -166,6 +223,17 @@ extern int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id); extern int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, bool cable_state); +/* + * get/set_property access the property value of each external connector. + * They are used to access the property of each cable based on the property id. + */ +extern int extcon_get_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value *prop_val); +extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value prop_val); + /* * Following APIs are to monitor every action of a notifier. * Registrar gets notified for every external port of a connection device. @@ -239,6 +307,19 @@ static inline int extcon_set_cable_state_(struct extcon_dev *edev, return 0; } +static inline int extcon_get_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value *prop_val) +{ + return 0; +} +static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value prop_val) +{ + return 0; +} + static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) { return NULL; -- cgit v1.2.3 From 7f2a0a1699b51bfd738f1e0b15e057996fe1f259 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 25 Jul 2016 21:15:19 +0900 Subject: extcon: Add the support for the capability of each property This patch adds the support of the property capability setting. This function decides the supported properties of each external connector on extcon provider driver. Ths list of new extcon APIs to get/set the capability of property as following: - int extcon_get_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); - int extcon_set_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index f9d4a44e86d3..f08469089f74 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -234,6 +234,16 @@ extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val); +/* + * get/set_property_capability set the capability of the property for each + * external connector. They are used to set the capability of the property + * of each external connector based on the id and property. + */ +extern int extcon_get_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop); +extern int extcon_set_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop); + /* * Following APIs are to monitor every action of a notifier. * Registrar gets notified for every external port of a connection device. @@ -320,6 +330,18 @@ static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, return 0; } +static inline int extcon_get_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop) +{ + return 0; +} + +static inline int extcon_set_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop) +{ + return 0; +} + static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) { return NULL; -- cgit v1.2.3 From 575c2b867ee0c2affdd309f375c032c0c7dc219c Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 22 Jul 2016 13:03:17 +0900 Subject: extcon: Rename the extcon_set/get_state() to maintain the function naming pattern This patch just renames the existing extcon_get/set_cable_state_() as following because of maintaining the function naming pattern like as extcon APIs for property. - extcon_set_cable_state_() -> extcon_set_state() - extcon_get_cable_state_() -> extcon_get_state() But, this patch remains the old extcon_set/get_cable_state_() functions to prevent the build break. After altering new APIs, remove the old APIs. Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index f08469089f74..4fa37385c97a 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -216,11 +216,11 @@ extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); /* - * get/set_cable_state access each bit of the 32b encoded state value. + * get/set_state access each bit of the 32b encoded state value. * They are used to access the status of each cable based on the cable id. */ -extern int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id); -extern int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, +extern int extcon_get_state(struct extcon_dev *edev, unsigned int id); +extern int extcon_set_state(struct extcon_dev *edev, unsigned int id, bool cable_state); /* @@ -305,14 +305,14 @@ static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, static inline void devm_extcon_dev_free(struct extcon_dev *edev) { } -static inline int extcon_get_cable_state_(struct extcon_dev *edev, - unsigned int id) + +static inline int extcon_get_state(struct extcon_dev *edev, unsigned int id) { return 0; } -static inline int extcon_set_cable_state_(struct extcon_dev *edev, - unsigned int id, bool cable_state) +static inline int extcon_set_state(struct extcon_dev *edev, unsigned int id, + bool cable_state) { return 0; } @@ -402,4 +402,15 @@ static inline int extcon_unregister_interest(struct extcon_specific_cable_nb { return -EINVAL; } + +static inline int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id) +{ + return extcon_get_state(edev, id); +} + +static inline int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, + bool cable_state) +{ + return extcon_set_state(edev, id, cable_state); +} #endif /* __LINUX_EXTCON_H__ */ -- cgit v1.2.3 From ab11af049f88f059a73f679fb050bd7abb98d24b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 22 Jul 2016 13:16:34 +0900 Subject: extcon: Add the synchronization extcon APIs to support the notification This patch adds the synchronization extcon APIs to support the notifications for both state and property. When extcon_*_sync() functions is called, the extcon informs the information from extcon provider to extcon client. The extcon driver may need to change the both state and multiple properties at the same time. After setting the data of a external connector, the extcon send the notification to client driver with the extcon_*_sync(). The list of new extcon APIs as following: - extcon_sync() : Send the notification for each external connector to synchronize the information between extcon provider driver and extcon client driver. - extcon_set_state_sync() : Set the state of external connector with noti. - extcon_set_property_sync() : Set the property of external connector with noti. For example, case 1, change the state of external connector and synchronized the data. extcon_set_state_sync(edev, EXTCON_USB, 1); case 2, change both the state and property of external connector and synchronized the data. extcon_set_state(edev, EXTCON_USB, 1); extcon_set_property(edev, EXTCON_USB, EXTCON_PROP_USB_VBUS 1); extcon_sync(edev, EXTCON_USB); case 3, change the property of external connector and synchronized the data. extcon_set_property(edev, EXTCON_USB, EXTCON_PROP_USB_VBUS, 0); extcon_sync(edev, EXTCON_USB); case 4, change the property of external connector and synchronized the data. extcon_set_property_sync(edev, EXTCON_USB, EXTCON_PROP_USB_VBUS, 0); Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 4fa37385c97a..162c46a42bac 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -222,6 +222,13 @@ extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); extern int extcon_get_state(struct extcon_dev *edev, unsigned int id); extern int extcon_set_state(struct extcon_dev *edev, unsigned int id, bool cable_state); +extern int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, + bool cable_state); + +/* + * Synchronize the state and property data for a specific external connector. + */ +extern int extcon_sync(struct extcon_dev *edev, unsigned int id); /* * get/set_property access the property value of each external connector. @@ -233,6 +240,9 @@ extern int extcon_get_property(struct extcon_dev *edev, unsigned int id, extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val); +extern int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value prop_val); /* * get/set_property_capability set the capability of the property for each @@ -317,6 +327,17 @@ static inline int extcon_set_state(struct extcon_dev *edev, unsigned int id, return 0; } +static inline int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, + bool cable_state) +{ + return 0; +} + +static inline int extcon_sync(struct extcon_dev *edev, unsigned int id) +{ + return 0; +} + static inline int extcon_get_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value *prop_val) @@ -330,6 +351,13 @@ static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, return 0; } +static inline int extcon_set_property_sync(struct extcon_dev *edev, + unsigned int id, unsigned int prop, + union extcon_property_value prop_val) +{ + return 0; +} + static inline int extcon_get_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop) { @@ -411,6 +439,6 @@ static inline int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int static inline int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, bool cable_state) { - return extcon_set_state(edev, id, cable_state); + return extcon_set_state_sync(edev, id, cable_state); } #endif /* __LINUX_EXTCON_H__ */ -- cgit v1.2.3 From 2164188d57f8862f1b69e2c5b7c0585f01077bee Mon Sep 17 00:00:00 2001 From: Chris Zhong Date: Fri, 22 Jul 2016 01:13:02 +0900 Subject: extcon: Add EXTCON_DISP_DP and the property for USB Type-C Add EXTCON_DISP_DP for the Display external connector. For Type-C connector the DisplayPort can work as an Alternate Mode(VESA DisplayPort Alt Mode on USB Type-C Standard). The Type-C support both normal and flipped orientation, so add a property to extcon. Signed-off-by: Chris Zhong Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 162c46a42bac..ad7a1606a7f3 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -69,6 +69,7 @@ #define EXTCON_DISP_MHL 41 /* Mobile High-Definition Link */ #define EXTCON_DISP_DVI 42 /* Digital Visual Interface */ #define EXTCON_DISP_VGA 43 /* Video Graphics Array */ +#define EXTCON_DISP_DP 44 /* Display Port */ /* Miscellaneous external connector */ #define EXTCON_DOCK 60 @@ -102,11 +103,16 @@ * @type: integer (intval) * @value: 0 (low) or 1 (high) * @default: 0 (low) + * - EXTCON_PROP_USB_TYPEC_POLARITY + * @type: integer (intval) + * @value: 0 (normal) or 1 (flip) + * @default: 0 (normal) */ #define EXTCON_PROP_USB_VBUS 0 +#define EXTCON_PROP_USB_TYPEC_POLARITY 1 #define EXTCON_PROP_USB_MIN 0 -#define EXTCON_PROP_USB_MAX 0 +#define EXTCON_PROP_USB_MAX 1 #define EXTCON_PROP_USB_CNT (EXTCON_PROP_USB_MAX - EXTCON_PROP_USB_MIN + 1) /* Properties of EXTCON_TYPE_CHG. */ -- cgit v1.2.3 From 9c0595d688e9deb337ff8901bb25281cdb19050b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 5 Aug 2016 17:49:23 +0900 Subject: extcon: Add new EXTCON_DISP_HMD for Head-mounted Display device This patch adds the new EXTCON_DISP_HMD id for Head-mounted Display[1] device. The HMD device is usually for USB connector type So, the HMD connector has the two extcon types of both EXTCON_TYPE_DISP and EXTCON_TYPE_USB. [1] https://en.wikipedia.org/wiki/Head-mounted_display Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index ad7a1606a7f3..e79b644f41a7 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -70,6 +70,7 @@ #define EXTCON_DISP_DVI 42 /* Digital Visual Interface */ #define EXTCON_DISP_VGA 43 /* Video Graphics Array */ #define EXTCON_DISP_DP 44 /* Display Port */ +#define EXTCON_DISP_HMD 45 /* Head-Mounted Display */ /* Miscellaneous external connector */ #define EXTCON_DOCK 60 -- cgit v1.2.3 From 7fe95fb889faf07ee438384858d38b820973397e Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 5 Aug 2016 18:15:46 +0900 Subject: extcon: Add new EXTCON_CHG_WPT for Wireless Power Transfer device This patchs add the new EXTCON_CHG_WPT for Wireless Power Transfer[1]. The Wireless Power Transfer is the transmission of electronical energy from a power source. The EXTCON_CHG_WPT has the EXTCON_TYPE_CHG. [1] https://en.wikipedia.org/wiki/Wireless_power_transfer Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index e79b644f41a7..461abee969b7 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -53,6 +53,7 @@ #define EXTCON_CHG_USB_ACA 8 /* Accessory Charger Adapter */ #define EXTCON_CHG_USB_FAST 9 #define EXTCON_CHG_USB_SLOW 10 +#define EXTCON_CHG_WPT 11 /* Wireless Power Transfer */ /* Jack external connector */ #define EXTCON_JACK_MICROPHONE 20 -- cgit v1.2.3 From f067025bc676ba8d18fba5f959598339e39b86db Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 20 Jul 2016 13:13:50 -0700 Subject: dmaengine: add support to provide error result from a DMA transation Adding a new callback that will provide the error result for a transaction. The result is allocated on the stack and the callback should create a copy if it wishes to retain the information after exiting. The result parameter is now defined and takes over the dummy void pointer we placed in the helper functions previously. dmaengine drivers should start converting to the new "callback_result" callback in order to receive transaction results. Signed-off-by: Dave Jiang Reviewed-by: Lars-Peter Clausen Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 30de0197263a..cc535a478bae 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -441,6 +441,21 @@ typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); typedef void (*dma_async_tx_callback)(void *dma_async_param); +enum dmaengine_tx_result { + DMA_TRANS_NOERROR = 0, /* SUCCESS */ + DMA_TRANS_READ_FAILED, /* Source DMA read failed */ + DMA_TRANS_WRITE_FAILED, /* Destination DMA write failed */ + DMA_TRANS_ABORTED, /* Op never submitted / aborted */ +}; + +struct dmaengine_result { + enum dmaengine_tx_result result; + u32 residue; +}; + +typedef void (*dma_async_tx_callback_result)(void *dma_async_param, + const struct dmaengine_result *result); + struct dmaengine_unmap_data { u8 map_cnt; u8 to_cnt; @@ -478,6 +493,7 @@ struct dma_async_tx_descriptor { dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); int (*desc_free)(struct dma_async_tx_descriptor *tx); dma_async_tx_callback callback; + dma_async_tx_callback_result callback_result; void *callback_param; struct dmaengine_unmap_data *unmap; #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH -- cgit v1.2.3 From 585083c539ca3f5fb3d00057b25f9be3304d54c6 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 8 Jul 2016 22:33:37 +0800 Subject: mfd: ac100: Add driver for X-Powers AC100 audio codec / RTC combo IC The AC100 is a multifunction device with an audio codec subsystem and an RTC subsystem. These two subsystems share a common register space and host interface. Signed-off-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- include/linux/mfd/ac100.h | 178 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 include/linux/mfd/ac100.h (limited to 'include/linux') diff --git a/include/linux/mfd/ac100.h b/include/linux/mfd/ac100.h new file mode 100644 index 000000000000..3c148f196b9f --- /dev/null +++ b/include/linux/mfd/ac100.h @@ -0,0 +1,178 @@ +/* + * Functions and registers to access AC100 codec / RTC combo IC. + * + * Copyright (C) 2016 Chen-Yu Tsai + * + * Chen-Yu Tsai + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_MFD_AC100_H +#define __LINUX_MFD_AC100_H + +#include + +struct ac100_dev { + struct device *dev; + struct regmap *regmap; +}; + +/* Audio codec related registers */ +#define AC100_CHIP_AUDIO_RST 0x00 +#define AC100_PLL_CTRL1 0x01 +#define AC100_PLL_CTRL2 0x02 +#define AC100_SYSCLK_CTRL 0x03 +#define AC100_MOD_CLK_ENA 0x04 +#define AC100_MOD_RST_CTRL 0x05 +#define AC100_I2S_SR_CTRL 0x06 + +/* I2S1 interface */ +#define AC100_I2S1_CLK_CTRL 0x10 +#define AC100_I2S1_SND_OUT_CTRL 0x11 +#define AC100_I2S1_SND_IN_CTRL 0x12 +#define AC100_I2S1_MXR_SRC 0x13 +#define AC100_I2S1_VOL_CTRL1 0x14 +#define AC100_I2S1_VOL_CTRL2 0x15 +#define AC100_I2S1_VOL_CTRL3 0x16 +#define AC100_I2S1_VOL_CTRL4 0x17 +#define AC100_I2S1_MXR_GAIN 0x18 + +/* I2S2 interface */ +#define AC100_I2S2_CLK_CTRL 0x20 +#define AC100_I2S2_SND_OUT_CTRL 0x21 +#define AC100_I2S2_SND_IN_CTRL 0x22 +#define AC100_I2S2_MXR_SRC 0x23 +#define AC100_I2S2_VOL_CTRL1 0x24 +#define AC100_I2S2_VOL_CTRL2 0x25 +#define AC100_I2S2_VOL_CTRL3 0x26 +#define AC100_I2S2_VOL_CTRL4 0x27 +#define AC100_I2S2_MXR_GAIN 0x28 + +/* I2S3 interface */ +#define AC100_I2S3_CLK_CTRL 0x30 +#define AC100_I2S3_SND_OUT_CTRL 0x31 +#define AC100_I2S3_SND_IN_CTRL 0x32 +#define AC100_I2S3_SIG_PATH_CTRL 0x33 + +/* ADC digital controls */ +#define AC100_ADC_DIG_CTRL 0x40 +#define AC100_ADC_VOL_CTRL 0x41 + +/* HMIC plug sensing / key detection */ +#define AC100_HMIC_CTRL1 0x44 +#define AC100_HMIC_CTRL2 0x45 +#define AC100_HMIC_STATUS 0x46 + +/* DAC digital controls */ +#define AC100_DAC_DIG_CTRL 0x48 +#define AC100_DAC_VOL_CTRL 0x49 +#define AC100_DAC_MXR_SRC 0x4c +#define AC100_DAC_MXR_GAIN 0x4d + +/* Analog controls */ +#define AC100_ADC_APC_CTRL 0x50 +#define AC100_ADC_SRC 0x51 +#define AC100_ADC_SRC_BST_CTRL 0x52 +#define AC100_OUT_MXR_DAC_A_CTRL 0x53 +#define AC100_OUT_MXR_SRC 0x54 +#define AC100_OUT_MXR_SRC_BST 0x55 +#define AC100_HPOUT_CTRL 0x56 +#define AC100_ERPOUT_CTRL 0x57 +#define AC100_SPKOUT_CTRL 0x58 +#define AC100_LINEOUT_CTRL 0x59 + +/* ADC digital audio processing (high pass filter & auto gain control */ +#define AC100_ADC_DAP_L_STA 0x80 +#define AC100_ADC_DAP_R_STA 0x81 +#define AC100_ADC_DAP_L_CTRL 0x82 +#define AC100_ADC_DAP_R_CTRL 0x83 +#define AC100_ADC_DAP_L_T_L 0x84 /* Left Target Level */ +#define AC100_ADC_DAP_R_T_L 0x85 /* Right Target Level */ +#define AC100_ADC_DAP_L_H_A_C 0x86 /* Left High Avg. Coef */ +#define AC100_ADC_DAP_L_L_A_C 0x87 /* Left Low Avg. Coef */ +#define AC100_ADC_DAP_R_H_A_C 0x88 /* Right High Avg. Coef */ +#define AC100_ADC_DAP_R_L_A_C 0x89 /* Right Low Avg. Coef */ +#define AC100_ADC_DAP_L_D_T 0x8a /* Left Decay Time */ +#define AC100_ADC_DAP_L_A_T 0x8b /* Left Attack Time */ +#define AC100_ADC_DAP_R_D_T 0x8c /* Right Decay Time */ +#define AC100_ADC_DAP_R_A_T 0x8d /* Right Attack Time */ +#define AC100_ADC_DAP_N_TH 0x8e /* Noise Threshold */ +#define AC100_ADC_DAP_L_H_N_A_C 0x8f /* Left High Noise Avg. Coef */ +#define AC100_ADC_DAP_L_L_N_A_C 0x90 /* Left Low Noise Avg. Coef */ +#define AC100_ADC_DAP_R_H_N_A_C 0x91 /* Right High Noise Avg. Coef */ +#define AC100_ADC_DAP_R_L_N_A_C 0x92 /* Right Low Noise Avg. Coef */ +#define AC100_ADC_DAP_H_HPF_C 0x93 /* High High-Pass-Filter Coef */ +#define AC100_ADC_DAP_L_HPF_C 0x94 /* Low High-Pass-Filter Coef */ +#define AC100_ADC_DAP_OPT 0x95 /* AGC Optimum */ + +/* DAC digital audio processing (high pass filter & dynamic range control) */ +#define AC100_DAC_DAP_CTRL 0xa0 +#define AC100_DAC_DAP_H_HPF_C 0xa1 /* High High-Pass-Filter Coef */ +#define AC100_DAC_DAP_L_HPF_C 0xa2 /* Low High-Pass-Filter Coef */ +#define AC100_DAC_DAP_L_H_E_A_C 0xa3 /* Left High Energy Avg Coef */ +#define AC100_DAC_DAP_L_L_E_A_C 0xa4 /* Left Low Energy Avg Coef */ +#define AC100_DAC_DAP_R_H_E_A_C 0xa5 /* Right High Energy Avg Coef */ +#define AC100_DAC_DAP_R_L_E_A_C 0xa6 /* Right Low Energy Avg Coef */ +#define AC100_DAC_DAP_H_G_D_T_C 0xa7 /* High Gain Delay Time Coef */ +#define AC100_DAC_DAP_L_G_D_T_C 0xa8 /* Low Gain Delay Time Coef */ +#define AC100_DAC_DAP_H_G_A_T_C 0xa9 /* High Gain Attack Time Coef */ +#define AC100_DAC_DAP_L_G_A_T_C 0xaa /* Low Gain Attack Time Coef */ +#define AC100_DAC_DAP_H_E_TH 0xab /* High Energy Threshold */ +#define AC100_DAC_DAP_L_E_TH 0xac /* Low Energy Threshold */ +#define AC100_DAC_DAP_H_G_K 0xad /* High Gain K parameter */ +#define AC100_DAC_DAP_L_G_K 0xae /* Low Gain K parameter */ +#define AC100_DAC_DAP_H_G_OFF 0xaf /* High Gain offset */ +#define AC100_DAC_DAP_L_G_OFF 0xb0 /* Low Gain offset */ +#define AC100_DAC_DAP_OPT 0xb1 /* DRC optimum */ + +/* Digital audio processing enable */ +#define AC100_ADC_DAP_ENA 0xb4 +#define AC100_DAC_DAP_ENA 0xb5 + +/* SRC control */ +#define AC100_SRC1_CTRL1 0xb8 +#define AC100_SRC1_CTRL2 0xb9 +#define AC100_SRC1_CTRL3 0xba +#define AC100_SRC1_CTRL4 0xbb +#define AC100_SRC2_CTRL1 0xbc +#define AC100_SRC2_CTRL2 0xbd +#define AC100_SRC2_CTRL3 0xbe +#define AC100_SRC2_CTRL4 0xbf + +/* RTC clk control */ +#define AC100_CLK32K_ANALOG_CTRL 0xc0 +#define AC100_CLKOUT_CTRL1 0xc1 +#define AC100_CLKOUT_CTRL2 0xc2 +#define AC100_CLKOUT_CTRL3 0xc3 + +/* RTC module */ +#define AC100_RTC_RST 0xc6 +#define AC100_RTC_CTRL 0xc7 +#define AC100_RTC_SEC 0xc8 /* second */ +#define AC100_RTC_MIN 0xc9 /* minute */ +#define AC100_RTC_HOU 0xca /* hour */ +#define AC100_RTC_WEE 0xcb /* weekday */ +#define AC100_RTC_DAY 0xcc /* day */ +#define AC100_RTC_MON 0xcd /* month */ +#define AC100_RTC_YEA 0xce /* year */ +#define AC100_RTC_UPD 0xcf /* update trigger */ + +/* RTC alarm */ +#define AC100_ALM_INT_ENA 0xd0 +#define AC100_ALM_INT_STA 0xd1 +#define AC100_ALM_SEC 0xd8 +#define AC100_ALM_MIN 0xd9 +#define AC100_ALM_HOU 0xda +#define AC100_ALM_WEE 0xdb +#define AC100_ALM_DAY 0xdc +#define AC100_ALM_MON 0xdd +#define AC100_ALM_YEA 0xde +#define AC100_ALM_UPD 0xdf + +/* RTC general purpose register 0 ~ 15 */ +#define AC100_RTC_GP(x) (0xe0 + (x)) + +#endif /* __LINUX_MFD_AC100_H */ -- cgit v1.2.3 From 13bcc6a2853435bb5dad368bcbaa9d2a5b9c0ac4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 16 Jul 2016 15:22:55 -0500 Subject: sysctl: Stop implicitly passing current into sysctl_table_root.lookup Passing nsproxy into sysctl_table_root.lookup was a premature optimization in attempt to avoid depending on current. The directory /proc/self/sys has not appeared and if and when it does this code will need to be reviewed closely and reworked anyway. So remove the premature optimization. Acked-by: Kees Cook Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- include/linux/sysctl.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 697e160c78d0..f166ca0203e2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -155,8 +155,7 @@ struct ctl_table_set { struct ctl_table_root { struct ctl_table_set default_set; - struct ctl_table_set *(*lookup)(struct ctl_table_root *root, - struct nsproxy *namespaces); + struct ctl_table_set *(*lookup)(struct ctl_table_root *root); int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); }; -- cgit v1.2.3 From b032132c3c218f4a09e9499b3674299a752581c6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 30 Jul 2016 13:53:37 -0500 Subject: userns: Free user namespaces in process context Add the necessary boiler plate to move freeing of user namespaces into work queue and thus into process context where things can sleep. This is a necessary precursor to per user namespace sysctls. Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 9217169c64cb..4e79b3c64dee 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -39,6 +39,7 @@ struct user_namespace { struct key *persistent_keyring_register; struct rw_semaphore persistent_keyring_register_sem; #endif + struct work_struct work; }; extern struct user_namespace init_user_ns; @@ -54,12 +55,12 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) extern int create_user_ns(struct cred *new); extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); -extern void free_user_ns(struct user_namespace *ns); +extern void __put_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { if (ns && atomic_dec_and_test(&ns->count)) - free_user_ns(ns); + __put_user_ns(ns); } struct seq_operations; -- cgit v1.2.3 From dbec28460a89aa7c02c3301e9e108d98272549d2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 30 Jul 2016 13:58:49 -0500 Subject: userns: Add per user namespace sysctls. Limit per userns sysctls to only be opened for write by a holder of CAP_SYS_RESOURCE. Add all of the necessary boilerplate for having per user namespace sysctls. Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4e79b3c64dee..e5697eaf6bf9 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -40,6 +40,10 @@ struct user_namespace { struct rw_semaphore persistent_keyring_register_sem; #endif struct work_struct work; +#ifdef CONFIG_SYSCTL + struct ctl_table_set set; + struct ctl_table_header *sysctls; +#endif }; extern struct user_namespace init_user_ns; -- cgit v1.2.3 From b376c3e1b6770ddcb4f0782be16358095fcea0b6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 13:41:24 -0500 Subject: userns: Add a limit on the number of user namespaces Export the export the maximum number of user namespaces as /proc/sys/userns/max_user_namespaces. Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index e5697eaf6bf9..6421cca2daa9 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -44,9 +44,15 @@ struct user_namespace { struct ctl_table_set set; struct ctl_table_header *sysctls; #endif + int max_user_namespaces; + atomic_t user_namespaces; }; extern struct user_namespace init_user_ns; +extern bool setup_userns_sysctls(struct user_namespace *ns); +extern void retire_userns_sysctls(struct user_namespace *ns); +extern bool inc_user_namespaces(struct user_namespace *ns); +extern void dec_user_namespaces(struct user_namespace *ns); #ifdef CONFIG_USER_NS -- cgit v1.2.3 From f6b2db1a3e8d141dd144df58900fb0444d5d7c53 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 13:54:50 -0500 Subject: userns: Make the count of user namespaces per user Add a structure that is per user and per user ns and use it to hold the count of user namespaces. This makes prevents one user from creating denying service to another user by creating the maximum number of user namespaces. Rename the sysctl export of the maximum count from /proc/sys/userns/max_user_namespaces to /proc/sys/user/max_user_namespaces to reflect that the count is now per user. Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 6421cca2daa9..826de7a12a20 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -22,6 +22,7 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED +struct ucounts; struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -44,15 +45,24 @@ struct user_namespace { struct ctl_table_set set; struct ctl_table_header *sysctls; #endif + struct ucounts *ucounts; int max_user_namespaces; +}; + +struct ucounts { + struct hlist_node node; + struct user_namespace *ns; + kuid_t uid; + atomic_t count; atomic_t user_namespaces; }; extern struct user_namespace init_user_ns; -extern bool setup_userns_sysctls(struct user_namespace *ns); -extern void retire_userns_sysctls(struct user_namespace *ns); -extern bool inc_user_namespaces(struct user_namespace *ns); -extern void dec_user_namespaces(struct user_namespace *ns); + +bool setup_userns_sysctls(struct user_namespace *ns); +void retire_userns_sysctls(struct user_namespace *ns); +struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid); +void dec_user_namespaces(struct ucounts *ucounts); #ifdef CONFIG_USER_NS -- cgit v1.2.3 From 25f9c0817c535a728c1088542230fa327c577c9e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 14:41:52 -0500 Subject: userns: Generalize the user namespace count into ucount The same kind of recursive sane default limit and policy countrol that has been implemented for the user namespace is desirable for the other namespaces, so generalize the user namespace refernce count into a ucount. Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 826de7a12a20..9b676ead35c3 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -23,6 +23,12 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED struct ucounts; + +enum ucount_type { + UCOUNT_USER_NAMESPACES, + UCOUNT_COUNTS, +}; + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -46,7 +52,7 @@ struct user_namespace { struct ctl_table_header *sysctls; #endif struct ucounts *ucounts; - int max_user_namespaces; + int ucount_max[UCOUNT_COUNTS]; }; struct ucounts { @@ -54,15 +60,15 @@ struct ucounts { struct user_namespace *ns; kuid_t uid; atomic_t count; - atomic_t user_namespaces; + atomic_t ucount[UCOUNT_COUNTS]; }; extern struct user_namespace init_user_ns; bool setup_userns_sysctls(struct user_namespace *ns); void retire_userns_sysctls(struct user_namespace *ns); -struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid); -void dec_user_namespaces(struct ucounts *ucounts); +struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); +void dec_ucount(struct ucounts *ucounts, enum ucount_type type); #ifdef CONFIG_USER_NS -- cgit v1.2.3 From f333c700c6100b53050980986be922bb21466e29 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 14:08:36 -0500 Subject: pidns: Add a limit on the number of pid namespaces Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/pid_namespace.h | 1 + include/linux/user_namespace.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 918b117a7cd3..34cce96741bc 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -40,6 +40,7 @@ struct pid_namespace { struct fs_pin *bacct; #endif struct user_namespace *user_ns; + struct ucounts *ucounts; struct work_struct proc_work; kgid_t pid_gid; int hide_pid; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 9b676ead35c3..9ee94827728d 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -26,6 +26,7 @@ struct ucounts; enum ucount_type { UCOUNT_USER_NAMESPACES, + UCOUNT_PID_NAMESPACES, UCOUNT_COUNTS, }; -- cgit v1.2.3 From f7af3d1c03136275b876f58644599b120cf4ffdd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 14:11:25 -0500 Subject: utsns: Add a limit on the number of uts namespaces Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 1 + include/linux/utsname.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 9ee94827728d..f9df7dd2609a 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -27,6 +27,7 @@ struct ucounts; enum ucount_type { UCOUNT_USER_NAMESPACES, UCOUNT_PID_NAMESPACES, + UCOUNT_UTS_NAMESPACES, UCOUNT_COUNTS, }; diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 5093f58ae192..60f0bb83b313 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -24,6 +24,7 @@ struct uts_namespace { struct kref kref; struct new_utsname name; struct user_namespace *user_ns; + struct ucounts *ucounts; struct ns_common ns; }; extern struct uts_namespace init_uts_ns; -- cgit v1.2.3 From aba356616386e6e573a34c6d64ed12443686e5c8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 14:20:23 -0500 Subject: ipcns: Add a limit on the number of ipc namespaces Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/ipc_namespace.h | 1 + include/linux/user_namespace.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index d10e54f03c09..848e5796400e 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -58,6 +58,7 @@ struct ipc_namespace { /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; + struct ucounts *ucounts; struct ns_common ns; }; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index f9df7dd2609a..e1d672186f00 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -28,6 +28,7 @@ enum ucount_type { UCOUNT_USER_NAMESPACES, UCOUNT_PID_NAMESPACES, UCOUNT_UTS_NAMESPACES, + UCOUNT_IPC_NAMESPACES, UCOUNT_COUNTS, }; -- cgit v1.2.3 From d08311dd6fd8444e39710dd2fb97562895aed8fa Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 14:25:30 -0500 Subject: cgroupns: Add a limit on the number of cgroup namespaces Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/cgroup.h | 1 + include/linux/user_namespace.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 984f73b719a9..1ed92812785a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -621,6 +621,7 @@ struct cgroup_namespace { atomic_t count; struct ns_common ns; struct user_namespace *user_ns; + struct ucounts *ucounts; struct css_set *root_cset; }; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index e1d672186f00..d067f0d3038e 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -29,6 +29,7 @@ enum ucount_type { UCOUNT_PID_NAMESPACES, UCOUNT_UTS_NAMESPACES, UCOUNT_IPC_NAMESPACES, + UCOUNT_CGROUP_NAMESPACES, UCOUNT_COUNTS, }; -- cgit v1.2.3 From 703286608a220d53584cca5986aad5305eec75ed Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 14:33:23 -0500 Subject: netns: Add a limit on the number of net namespaces Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index d067f0d3038e..c6bc980b06a9 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -29,6 +29,7 @@ enum ucount_type { UCOUNT_PID_NAMESPACES, UCOUNT_UTS_NAMESPACES, UCOUNT_IPC_NAMESPACES, + UCOUNT_NET_NAMESPACES, UCOUNT_CGROUP_NAMESPACES, UCOUNT_COUNTS, }; -- cgit v1.2.3 From d8ad8b49618410ddeafd78465b63a6cedd6c9484 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 13 Jul 2016 11:13:56 -0400 Subject: security, overlayfs: provide copy up security hook for unioned files Provide a security hook to label new file correctly when a file is copied up from lower layer to upper layer of a overlay/union mount. This hook can prepare a new set of creds which are suitable for new file creation during copy up. Caller will use new creds to create file and then revert back to old creds and release new creds. Signed-off-by: Vivek Goyal Acked-by: Stephen Smalley [PM: whitespace cleanup to appease checkpatch.pl] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 11 +++++++++++ include/linux/security.h | 6 ++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 101bf19c0f41..cb69fc829053 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -401,6 +401,15 @@ * @inode contains a pointer to the inode. * @secid contains a pointer to the location where result will be saved. * In case of failure, @secid will be set to zero. + * @inode_copy_up: + * A file is about to be copied up from lower layer to upper layer of + * overlay filesystem. Security module can prepare a set of new creds + * and modify as need be and return new creds. Caller will switch to + * new creds temporarily to create new file and release newly allocated + * creds. + * @src indicates the union dentry of file that is being copied up. + * @new pointer to pointer to return newly allocated creds. + * Returns 0 on success or a negative error code on error. * * Security hooks for file operations * @@ -1425,6 +1434,7 @@ union security_list_options { int (*inode_listsecurity)(struct inode *inode, char *buffer, size_t buffer_size); void (*inode_getsecid)(struct inode *inode, u32 *secid); + int (*inode_copy_up)(struct dentry *src, struct cred **new); int (*file_permission)(struct file *file, int mask); int (*file_alloc_security)(struct file *file); @@ -1696,6 +1706,7 @@ struct security_hook_heads { struct list_head inode_setsecurity; struct list_head inode_listsecurity; struct list_head inode_getsecid; + struct list_head inode_copy_up; struct list_head file_permission; struct list_head file_alloc_security; struct list_head file_free_security; diff --git a/include/linux/security.h b/include/linux/security.h index 7831cd57bcf7..c5b0ccd6c8b6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -282,6 +282,7 @@ int security_inode_getsecurity(struct inode *inode, const char *name, void **buf int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags); int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size); void security_inode_getsecid(struct inode *inode, u32 *secid); +int security_inode_copy_up(struct dentry *src, struct cred **new); int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); @@ -758,6 +759,11 @@ static inline void security_inode_getsecid(struct inode *inode, u32 *secid) *secid = 0; } +static inline int security_inode_copy_up(struct dentry *src, struct cred **new) +{ + return 0; +} + static inline int security_file_permission(struct file *file, int mask) { return 0; -- cgit v1.2.3 From 121ab822ef21914adac2fa3730efeeb8fd762473 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 13 Jul 2016 10:44:49 -0400 Subject: security,overlayfs: Provide security hook for copy up of xattrs for overlay file Provide a security hook which is called when xattrs of a file are being copied up. This hook is called once for each xattr and LSM can return 0 if the security module wants the xattr to be copied up, 1 if the security module wants the xattr to be discarded on the copy, -EOPNOTSUPP if the security module does not handle/manage the xattr, or a -errno upon an error. Signed-off-by: David Howells Signed-off-by: Vivek Goyal Acked-by: Stephen Smalley [PM: whitespace cleanup for checkpatch.pl] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 10 ++++++++++ include/linux/security.h | 6 ++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index cb69fc829053..57971229551b 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -410,6 +410,14 @@ * @src indicates the union dentry of file that is being copied up. * @new pointer to pointer to return newly allocated creds. * Returns 0 on success or a negative error code on error. + * @inode_copy_up_xattr: + * Filter the xattrs being copied up when a unioned file is copied + * up from a lower layer to the union/overlay layer. + * @name indicates the name of the xattr. + * Returns 0 to accept the xattr, 1 to discard the xattr, -EOPNOTSUPP if + * security module does not know about attribute or a negative error code + * to abort the copy up. Note that the caller is responsible for reading + * and writing the xattrs as this hook is merely a filter. * * Security hooks for file operations * @@ -1435,6 +1443,7 @@ union security_list_options { size_t buffer_size); void (*inode_getsecid)(struct inode *inode, u32 *secid); int (*inode_copy_up)(struct dentry *src, struct cred **new); + int (*inode_copy_up_xattr)(const char *name); int (*file_permission)(struct file *file, int mask); int (*file_alloc_security)(struct file *file); @@ -1707,6 +1716,7 @@ struct security_hook_heads { struct list_head inode_listsecurity; struct list_head inode_getsecid; struct list_head inode_copy_up; + struct list_head inode_copy_up_xattr; struct list_head file_permission; struct list_head file_alloc_security; struct list_head file_free_security; diff --git a/include/linux/security.h b/include/linux/security.h index c5b0ccd6c8b6..536fafdfa10a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -283,6 +283,7 @@ int security_inode_setsecurity(struct inode *inode, const char *name, const void int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size); void security_inode_getsecid(struct inode *inode, u32 *secid); int security_inode_copy_up(struct dentry *src, struct cred **new); +int security_inode_copy_up_xattr(const char *name); int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); @@ -764,6 +765,11 @@ static inline int security_inode_copy_up(struct dentry *src, struct cred **new) return 0; } +static inline int security_inode_copy_up_xattr(const char *name) +{ + return -EOPNOTSUPP; +} + static inline int security_file_permission(struct file *file, int mask) { return 0; -- cgit v1.2.3 From 2602625b7e46576b00db619ac788c508ba3bcb2c Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 13 Jul 2016 10:44:52 -0400 Subject: security, overlayfs: Provide hook to correctly label newly created files During a new file creation we need to make sure new file is created with the right label. New file is created in upper/ so effectively file should get label as if task had created file in upper/. We switched to mounter's creds for actual file creation. Also if there is a whiteout present, then file will be created in work/ dir first and then renamed in upper. In none of the cases file will be labeled as we want it to be. This patch introduces a new hook dentry_create_files_as(), which determines the label/context dentry will get if it had been created by task in upper and modify passed set of creds appropriately. Caller makes use of these new creds for file creation. Signed-off-by: Vivek Goyal Acked-by: Stephen Smalley [PM: fix whitespace issues found with checkpatch.pl] [PM: changes to use stat->mode in ovl_create_or_link()] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 15 +++++++++++++++ include/linux/security.h | 12 ++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 57971229551b..f2af2af131ac 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -151,6 +151,16 @@ * @name name of the last path component used to create file * @ctx pointer to place the pointer to the resulting context in. * @ctxlen point to place the length of the resulting context. + * @dentry_create_files_as: + * Compute a context for a dentry as the inode is not yet available + * and set that context in passed in creds so that new files are + * created using that context. Context is calculated using the + * passed in creds and not the creds of the caller. + * @dentry dentry to use in calculating the context. + * @mode mode used to determine resource type. + * @name name of the last path component used to create file + * @old creds which should be used for context calculation + * @new creds to modify * * * Security hooks for inode operations. @@ -1375,6 +1385,10 @@ union security_list_options { int (*dentry_init_security)(struct dentry *dentry, int mode, const struct qstr *name, void **ctx, u32 *ctxlen); + int (*dentry_create_files_as)(struct dentry *dentry, int mode, + struct qstr *name, + const struct cred *old, + struct cred *new); #ifdef CONFIG_SECURITY_PATH @@ -1675,6 +1689,7 @@ struct security_hook_heads { struct list_head sb_clone_mnt_opts; struct list_head sb_parse_opts_str; struct list_head dentry_init_security; + struct list_head dentry_create_files_as; #ifdef CONFIG_SECURITY_PATH struct list_head path_unlink; struct list_head path_mkdir; diff --git a/include/linux/security.h b/include/linux/security.h index 536fafdfa10a..a6c6d5d0fa5d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -242,6 +242,10 @@ int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts); int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, void **ctx, u32 *ctxlen); +int security_dentry_create_files_as(struct dentry *dentry, int mode, + struct qstr *name, + const struct cred *old, + struct cred *new); int security_inode_alloc(struct inode *inode); void security_inode_free(struct inode *inode); @@ -600,6 +604,14 @@ static inline int security_dentry_init_security(struct dentry *dentry, return -EOPNOTSUPP; } +static inline int security_dentry_create_files_as(struct dentry *dentry, + int mode, struct qstr *name, + const struct cred *old, + struct cred *new) +{ + return 0; +} + static inline int security_inode_init_security(struct inode *inode, struct inode *dir, -- cgit v1.2.3 From a4f4528a3174646e654989262afdc8303835fcd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Sat, 9 Jul 2016 20:19:15 +0200 Subject: module: Fully remove the kernel_module_from_file hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove remaining kernel_module_from_file hook left by commit a1db74209483 ("module: replace copy_module_from_fd with kernel version") Signed-off-by: Mickaël Salaün Cc: Rusty Russell Acked-by: Kees Cook Signed-off-by: Mimi Zohar Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 1 - include/linux/security.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 101bf19c0f41..dc56ae3a68cd 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1455,7 +1455,6 @@ union security_list_options { int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); int (*kernel_module_request)(char *kmod_name); - int (*kernel_module_from_file)(struct file *file); int (*kernel_read_file)(struct file *file, enum kernel_read_file_id id); int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size, enum kernel_read_file_id id); diff --git a/include/linux/security.h b/include/linux/security.h index 7831cd57bcf7..4c7412943b81 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -307,7 +307,6 @@ void security_transfer_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); -int security_kernel_module_from_file(struct file *file); int security_kernel_read_file(struct file *file, enum kernel_read_file_id id); int security_kernel_post_read_file(struct file *file, char *buf, loff_t size, enum kernel_read_file_id id); -- cgit v1.2.3 From 4b394a232df78414442778b02ca4a388d947d059 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 26 Jul 2016 19:10:21 -0500 Subject: crypto: ccp - Let a v5 CCP provide the same function as v3 Enable equivalent function on a v5 CCP. Add support for a version 5 CCP which enables AES/XTS/SHA services. Also, more work on the data structures to virtualize functionality. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index 7c2bb27c067c..a7653339fedb 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -238,9 +238,6 @@ struct ccp_xts_aes_engine { }; /***** SHA engine *****/ -#define CCP_SHA_BLOCKSIZE SHA256_BLOCK_SIZE -#define CCP_SHA_CTXSIZE SHA256_DIGEST_SIZE - /** * ccp_sha_type - type of SHA operation * -- cgit v1.2.3 From 1ebe88d38ddec6b05f7b5e64acef30a98a4ad17e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 25 Jun 2016 22:38:21 -0700 Subject: usb: ulpi: Automatically set driver::owner with ulpi_driver_register() Let's follow other driver registration functions and automatically set the driver's owner member to THIS_MODULE when ulpi_driver_register() is called. This allows ulpi driver writers to forget about this boiler plate detail and avoids common bugs in the process. Signed-off-by: Stephen Boyd Acked-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/ulpi/driver.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ulpi/driver.h b/include/linux/ulpi/driver.h index 388f6e08b9d4..80b36ca12e80 100644 --- a/include/linux/ulpi/driver.h +++ b/include/linux/ulpi/driver.h @@ -47,7 +47,11 @@ struct ulpi_driver { #define to_ulpi_driver(d) container_of(d, struct ulpi_driver, driver) -int ulpi_register_driver(struct ulpi_driver *drv); +/* + * use a macro to avoid include chaining to get THIS_MODULE + */ +#define ulpi_register_driver(drv) __ulpi_register_driver(drv, THIS_MODULE) +int __ulpi_register_driver(struct ulpi_driver *drv, struct module *module); void ulpi_unregister_driver(struct ulpi_driver *drv); #define module_ulpi_driver(__ulpi_driver) \ -- cgit v1.2.3 From 0f4be8cf637ea4637faba8a0e4bf2270287c6ba0 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Wed, 10 Aug 2016 09:39:06 +0200 Subject: mfd: stmpe: Add STMPE_IDX_SYS_CTRL/2 enum As STMPE1801/1601/24xx has a SYS_CTRL register and STMPE1601/2403 has even a SYS_CTRL2 register, add STMPE_IDX_SYS_CTRL/2 and update driver code accordingly This update prepares the ground for not yet supported STMPE1600 which share similar REG_SYS_CTRL register. Signed-off-by: Patrice Chotard Acked-by: Linus Walleij Signed-off-by: Lee Jones --- include/linux/mfd/stmpe.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index de748bc7525e..eb8b73bd139f 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -39,6 +39,8 @@ enum stmpe_partnum { */ enum { STMPE_IDX_CHIP_ID, + STMPE_IDX_SYS_CTRL, + STMPE_IDX_SYS_CTRL2, STMPE_IDX_ICR_LSB, STMPE_IDX_IER_LSB, STMPE_IDX_ISR_LSB, -- cgit v1.2.3 From 897ac6674c64ca94df5b70ea5c6815a296e1d32a Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Wed, 10 Aug 2016 09:39:11 +0200 Subject: mfd: stmpe: Rework registers access this update allows to use registers map as following : regs[reg_index + offset] instead of regs[reg_index] + offset This makes code clearer and will facilitate the addition of STMPE1600 on which LSB and MSB registers are respectively located at addr and addr + 1. Despite for all others STMPE variant, LSB and MSB registers are respectively located in reverse order at addr + 1 and addr. For variant which have 3 registers's bank, we use LSB,CSB and MSB indexes which contains respectively LSB (or LOW), CSB (or MID) and MSB (or HIGH) register addresses (STMPE1801/STMPE24xx). For variant which have 2 registers's bank, we use LSB and CSB indexes only. In this case the CSB index contains the MSB regs address (STMPE 1601). Signed-off-by: Patrice Chotard Reviewed-by: Linus Walleij Signed-off-by: Lee Jones --- include/linux/mfd/stmpe.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index eb8b73bd139f..6b26661a640e 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -43,20 +43,38 @@ enum { STMPE_IDX_SYS_CTRL2, STMPE_IDX_ICR_LSB, STMPE_IDX_IER_LSB, + STMPE_IDX_IER_MSB, STMPE_IDX_ISR_LSB, STMPE_IDX_ISR_MSB, STMPE_IDX_GPMR_LSB, + STMPE_IDX_GPMR_CSB, + STMPE_IDX_GPMR_MSB, STMPE_IDX_GPSR_LSB, + STMPE_IDX_GPSR_CSB, + STMPE_IDX_GPSR_MSB, STMPE_IDX_GPCR_LSB, + STMPE_IDX_GPCR_CSB, + STMPE_IDX_GPCR_MSB, STMPE_IDX_GPDR_LSB, + STMPE_IDX_GPDR_CSB, + STMPE_IDX_GPDR_MSB, + STMPE_IDX_GPEDR_LSB, + STMPE_IDX_GPEDR_CSB, STMPE_IDX_GPEDR_MSB, STMPE_IDX_GPRER_LSB, + STMPE_IDX_GPRER_CSB, + STMPE_IDX_GPRER_MSB, STMPE_IDX_GPFER_LSB, + STMPE_IDX_GPFER_CSB, + STMPE_IDX_GPFER_MSB, STMPE_IDX_GPPUR_LSB, STMPE_IDX_GPPDR_LSB, STMPE_IDX_GPAFR_U_MSB, STMPE_IDX_IEGPIOR_LSB, + STMPE_IDX_IEGPIOR_CSB, + STMPE_IDX_IEGPIOR_MSB, STMPE_IDX_ISGPIOR_LSB, + STMPE_IDX_ISGPIOR_CSB, STMPE_IDX_ISGPIOR_MSB, STMPE_IDX_MAX, }; -- cgit v1.2.3 From 6bb9f0d93399cbde14fc6a1532341a14a85d2df4 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Wed, 10 Aug 2016 09:39:14 +0200 Subject: mfd: Add STMPE1600 support STMPE1600 is a 16-bit port expander. Datasheet is available here : http://www2.st.com/content/st_com/en/products/interfaces-and-transceivers/ i-o-expanders-and-level-translators/i-o-expanders/stmpe1600.html Signed-off-by: Amelie DELAUNAY Signed-off-by: Patrice Chotard Acked-by: Linus Walleij Signed-off-by: Lee Jones --- include/linux/mfd/stmpe.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index 6b26661a640e..4a827af17e59 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -26,6 +26,7 @@ enum stmpe_partnum { STMPE610, STMPE801, STMPE811, + STMPE1600, STMPE1601, STMPE1801, STMPE2401, -- cgit v1.2.3 From e48c178814b4a33f84f62d01f5a601ebd57fbba8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jul 2016 09:18:30 +0200 Subject: perf/core: Optimize perf_pmu_sched_task() For perf record -b, which requires the pmu::sched_task callback the current code is rather expensive: 7.68% sched-pipe [kernel.vmlinux] [k] perf_pmu_sched_task 5.95% sched-pipe [kernel.vmlinux] [k] __switch_to 5.20% sched-pipe [kernel.vmlinux] [k] __intel_pmu_disable_all 3.95% sched-pipe perf [.] worker_thread The problem is that it will iterate all registered PMUs, most of which will not have anything to do. Avoid this by keeping an explicit list of PMUs that have requested the callback. The perf_sched_cb_{inc,dec}() functions already takes the required pmu argument, and now that these functions are no longer called from NMI context we can use them to manage a list. With this patch applied the function doesn't show up in the top 4 anymore (it dropped to 18th place). 6.67% sched-pipe [kernel.vmlinux] [k] __switch_to 6.18% sched-pipe [kernel.vmlinux] [k] __intel_pmu_disable_all 3.92% sched-pipe [kernel.vmlinux] [k] switch_mm_irqs_off 3.71% sched-pipe perf [.] worker_thread Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2b6b43cc0dd5..529c41fa73c8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -774,6 +774,9 @@ struct perf_cpu_context { #ifdef CONFIG_CGROUP_PERF struct perf_cgroup *cgrp; #endif + + struct list_head sched_cb_entry; + int sched_cb_usage; }; struct perf_output_handle { -- cgit v1.2.3 From bd425d4bfc7a1a6064dbbadfbac9c7eec0e426ec Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Wed, 22 Jun 2016 18:03:12 +0100 Subject: sched/core: Fix power to capacity renaming in comment It is seems that this one escaped Nico's renaming of cpu_power to cpu_capacity a while back. Signed-off-by: Morten Rasmussen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dietmar.eggemann@arm.com Cc: linux-kernel@vger.kernel.org Cc: mgalbraith@suse.de Cc: vincent.guittot@linaro.org Cc: yuyang.du@intel.com Link: http://lkml.kernel.org/r/1466615004-3503-2-git-send-email-morten.rasmussen@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 62c68e513e39..f3db596efd2c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1022,7 +1022,7 @@ extern void wake_up_q(struct wake_q_head *head); #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu power */ +#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ #define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ -- cgit v1.2.3 From 80127a39681bd68c959f0953f84a830cbd7c3b1c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Jul 2016 20:08:46 +0200 Subject: locking/percpu-rwsem: Optimize readers and reduce global impact Currently the percpu-rwsem switches to (global) atomic ops while a writer is waiting; which could be quite a while and slows down releasing the readers. This patch cures this problem by ordering the reader-state vs reader-count (see the comments in __percpu_down_read() and percpu_down_write()). This changes a global atomic op into a full memory barrier, which doesn't have the global cacheline contention. This also enables using the percpu-rwsem with rcu_sync disabled in order to bias the implementation differently, reducing the writer latency by adding some cost to readers. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Paul McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org [ Fixed modular build. ] Signed-off-by: Ingo Molnar --- include/linux/percpu-rwsem.h | 84 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index c2fa3ecb0dce..146efefde2a1 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -10,30 +10,96 @@ struct percpu_rw_semaphore { struct rcu_sync rss; - unsigned int __percpu *fast_read_ctr; + unsigned int __percpu *read_count; struct rw_semaphore rw_sem; - atomic_t slow_read_ctr; - wait_queue_head_t write_waitq; + wait_queue_head_t writer; + int readers_block; }; -extern void percpu_down_read(struct percpu_rw_semaphore *); -extern int percpu_down_read_trylock(struct percpu_rw_semaphore *); -extern void percpu_up_read(struct percpu_rw_semaphore *); +extern int __percpu_down_read(struct percpu_rw_semaphore *, int); +extern void __percpu_up_read(struct percpu_rw_semaphore *); + +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +{ + might_sleep(); + + rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_); + + preempt_disable(); + /* + * We are in an RCU-sched read-side critical section, so the writer + * cannot both change sem->state from readers_fast and start checking + * counters while we are here. So if we see !sem->state, we know that + * the writer won't be checking until we're past the preempt_enable() + * and that one the synchronize_sched() is done, the writer will see + * anything we did within this RCU-sched read-size critical section. + */ + __this_cpu_inc(*sem->read_count); + if (unlikely(!rcu_sync_is_idle(&sem->rss))) + __percpu_down_read(sem, false); /* Unconditional memory barrier */ + preempt_enable(); + /* + * The barrier() from preempt_enable() prevents the compiler from + * bleeding the critical section out. + */ +} + +static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) +{ + int ret = 1; + + preempt_disable(); + /* + * Same as in percpu_down_read(). + */ + __this_cpu_inc(*sem->read_count); + if (unlikely(!rcu_sync_is_idle(&sem->rss))) + ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ + preempt_enable(); + /* + * The barrier() from preempt_enable() prevents the compiler from + * bleeding the critical section out. + */ + + if (ret) + rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_); + + return ret; +} + +static inline void percpu_up_read(struct percpu_rw_semaphore *sem) +{ + /* + * The barrier() in preempt_disable() prevents the compiler from + * bleeding the critical section out. + */ + preempt_disable(); + /* + * Same as in percpu_down_read(). + */ + if (likely(rcu_sync_is_idle(&sem->rss))) + __this_cpu_dec(*sem->read_count); + else + __percpu_up_read(sem); /* Unconditional memory barrier */ + preempt_enable(); + + rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); +} extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, const char *, struct lock_class_key *); + extern void percpu_free_rwsem(struct percpu_rw_semaphore *); -#define percpu_init_rwsem(brw) \ +#define percpu_init_rwsem(sem) \ ({ \ static struct lock_class_key rwsem_key; \ - __percpu_init_rwsem(brw, #brw, &rwsem_key); \ + __percpu_init_rwsem(sem, #sem, &rwsem_key); \ }) - #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem) static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, -- cgit v1.2.3 From d1c6d149cf04d6c7c3c3ebf4b66c82500cbcf6e1 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 23 Jul 2016 09:46:39 +0200 Subject: sched/debug: Make the "Preemption disabled at ..." message more useful This message is currently really useless since it always prints a value that comes from the printk() we just did, e.g.: BUG: sleeping function called from invalid context at mm/slab.h:388 in_atomic(): 0, irqs_disabled(): 0, pid: 31996, name: trinity-c1 Preemption disabled at:[] down_trylock+0x13/0x80 BUG: sleeping function called from invalid context at include/linux/freezer.h:56 in_atomic(): 0, irqs_disabled(): 0, pid: 31996, name: trinity-c1 Preemption disabled at:[] console_unlock+0x2f7/0x930 Here, both down_trylock() and console_unlock() is somewhere in the printk() path. We should save the value before calling printk() and use the saved value instead. That immediately reveals the offending callsite: BUG: sleeping function called from invalid context at mm/slab.h:388 in_atomic(): 0, irqs_disabled(): 0, pid: 14971, name: trinity-c2 Preemption disabled at:[] rhashtable_walk_start+0x46/0x150 Bug report: http://marc.info/?l=linux-netdev&m=146925979821849&w=2 Signed-off-by: Vegard Nossum Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rusty Russel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f3db596efd2c..7f64e89a5873 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3236,6 +3236,15 @@ static inline void cond_resched_rcu(void) #endif } +static inline unsigned long get_preempt_disable_ip(struct task_struct *p) +{ +#ifdef CONFIG_DEBUG_PREEMPT + return p->preempt_disable_ip; +#else + return 0; +#endif +} + /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPT, -- cgit v1.2.3 From 0e0b2afdf644aa523f5eb10ce1f9e3c6cd8362ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Aug 2016 11:23:43 -0400 Subject: kernfs: add dummy implementation of kernfs_path_from_node() The dummy version of kernfs_path_from_node() was missing. This currently doesn't break anything. Let's add it for consistency and to ease adding wrappers around it. v2: Removed stray ';' which was causing build failures. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 96356ef012de..7d2efd2128bb 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -344,6 +344,11 @@ static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) static inline size_t kernfs_path_len(struct kernfs_node *kn) { return 0; } +static inline int kernfs_path_from_node(struct kernfs_node *root_kn, + struct kernfs_node *kn, + char *buf, size_t buflen) +{ return -ENOSYS; } + static inline char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) { return NULL; } -- cgit v1.2.3 From 3abb1d90f5d930c6183534a624aa0158a71bc5eb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Aug 2016 11:23:44 -0400 Subject: kernfs: make kernfs_path*() behave in the style of strlcpy() kernfs_path*() functions always return the length of the full path but the path content is undefined if the length is larger than the provided buffer. This makes its behavior different from strlcpy() and requires error handling in all its users even when they don't care about truncation. In addition, the implementation can actully be simplified by making it behave properly in strlcpy() style. * Update kernfs_path_from_node_locked() to always fill up the buffer with path. If the buffer is not large enough, the output is truncated and terminated. * kernfs_path() no longer needs error handling. Make it a simple inline wrapper around kernfs_path_from_node(). * sysfs_warn_dup()'s use of kernfs_path() doesn't need error handling. Updated accordingly. * cgroup_path()'s use of kernfs_path() updated to retain the old behavior. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Acked-by: Serge Hallyn --- include/linux/cgroup.h | 7 ++++++- include/linux/kernfs.h | 21 ++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 984f73b719a9..5a9abdee43fe 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -541,7 +541,12 @@ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen) { - return kernfs_path(cgrp->kn, buf, buflen); + int ret; + + ret = kernfs_path(cgrp->kn, buf, buflen); + if (ret < 0 || ret >= buflen) + return NULL; + return buf; } static inline void pr_cont_cgroup_name(struct cgroup *cgrp) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 7d2efd2128bb..4a02b1b49821 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -272,7 +272,6 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); size_t kernfs_path_len(struct kernfs_node *kn); int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen); -char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); @@ -349,10 +348,6 @@ static inline int kernfs_path_from_node(struct kernfs_node *root_kn, char *buf, size_t buflen) { return -ENOSYS; } -static inline char *kernfs_path(struct kernfs_node *kn, char *buf, - size_t buflen) -{ return NULL; } - static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { } static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { } @@ -441,6 +436,22 @@ static inline void kernfs_init(void) { } #endif /* CONFIG_KERNFS */ +/** + * kernfs_path - build full path of a given node + * @kn: kernfs_node of interest + * @buf: buffer to copy @kn's name into + * @buflen: size of @buf + * + * Builds and returns the full path of @kn in @buf of @buflen bytes. The + * path is built from the end of @buf so the returned pointer usually + * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated + * and %NULL is returned. + */ +static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) +{ + return kernfs_path_from_node(kn, NULL, buf, buflen); +} + static inline struct kernfs_node * kernfs_find_and_get(struct kernfs_node *kn, const char *name) { -- cgit v1.2.3 From bb09c8634b1e484b8840fb2384d55739bfcb68bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Aug 2016 11:23:44 -0400 Subject: kernfs: remove kernfs_path_len() It doesn't have any in-kernel user and the same result can be obtained from kernfs_path(@kn, NULL, 0). Remove it. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: Serge Hallyn --- include/linux/kernfs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 4a02b1b49821..7056238fd9f5 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -269,7 +269,6 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); -size_t kernfs_path_len(struct kernfs_node *kn); int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); @@ -340,9 +339,6 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { return -ENOSYS; } -static inline size_t kernfs_path_len(struct kernfs_node *kn) -{ return 0; } - static inline int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen) -- cgit v1.2.3 From 4c737b41de7f4eef2a593803bad1b918dd718b10 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Aug 2016 11:23:44 -0400 Subject: cgroup: make cgroup_path() and friends behave in the style of strlcpy() cgroup_path() and friends used to format the path from the end and thus the resulting path usually didn't start at the start of the passed in buffer. Also, when the buffer was too small, the partial result was truncated from the head rather than tail and there was no way to tell how long the full path would be. These make the functions less robust and more awkward to use. With recent updates to kernfs_path(), cgroup_path() and friends can be made to behave in strlcpy() style. * cgroup_path(), cgroup_path_ns[_locked]() and task_cgroup_path() now always return the length of the full path. If buffer is too small, it contains nul terminated truncated output. * All users updated accordingly. v2: cgroup_path() usage in kernel/sched/debug.c converted. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Cc: Serge Hallyn Cc: Peter Zijlstra --- include/linux/blk-cgroup.h | 11 +---------- include/linux/cgroup.h | 16 +++++----------- 2 files changed, 6 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 10648e300c93..4e8c215e185c 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -343,16 +343,7 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) */ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) { - char *p; - - p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); - if (!p) { - strncpy(buf, "", buflen); - return -ENAMETOOLONG; - } - - memmove(buf, p, buf + buflen - p); - return 0; + return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); } /** diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5a9abdee43fe..6df36361a492 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -97,7 +97,7 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); void cgroup_file_notify(struct cgroup_file *cfile); -char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); @@ -538,15 +538,9 @@ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) return kernfs_name(cgrp->kn, buf, buflen); } -static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, - size_t buflen) +static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen) { - int ret; - - ret = kernfs_path(cgrp->kn, buf, buflen); - if (ret < 0 || ret >= buflen) - return NULL; - return buf; + return kernfs_path(cgrp->kn, buf, buflen); } static inline void pr_cont_cgroup_name(struct cgroup *cgrp) @@ -639,8 +633,8 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, struct cgroup_namespace *old_ns); -char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, - struct cgroup_namespace *ns); +int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, + struct cgroup_namespace *ns); #else /* !CONFIG_CGROUPS */ -- cgit v1.2.3 From f11fa1796a4b4f8c6d4ced37e8824276ec57057d Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 10 Aug 2016 17:53:54 +0530 Subject: mfd: tps65218: add version check to the PMIC probe Version information will be needed to handle some error cases under the regulator driver, so store the information once during MFD probe. Signed-off-by: Tero Kristo Signed-off-by: Dave Gerlach Signed-off-by: Keerthy Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/tps65218.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index 7fdf5326f34e..85e464e32c43 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -267,6 +267,7 @@ struct tps_info { struct tps65218 { struct device *dev; unsigned int id; + u8 rev; struct mutex tps_lock; /* lock guarding the data structure */ /* IRQ Data */ -- cgit v1.2.3 From 23a34f9d03a5d40a6234855bc069da370708cc9e Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 10 Aug 2016 17:53:55 +0530 Subject: regulator: tps65218: do not disable DCDC3 during poweroff on broken PMICs Some versions of tps65218 do not seem to support poweroff modes properly if DCDC3 regulator is shut-down. Thus, keep it enabled even during poweroff if the version info matches the broken silicon revision. Signed-off-by: Tero Kristo Signed-off-by: Dave Gerlach Signed-off-by: Keerthy Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/tps65218.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index 85e464e32c43..d1db9527fab5 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -63,6 +63,11 @@ #define TPS65218_CHIPID_CHIP_MASK 0xF8 #define TPS65218_CHIPID_REV_MASK 0x07 +#define TPS65218_REV_1_0 0x0 +#define TPS65218_REV_1_1 0x1 +#define TPS65218_REV_2_0 0x2 +#define TPS65218_REV_2_1 0x3 + #define TPS65218_INT1_VPRG BIT(5) #define TPS65218_INT1_AC BIT(4) #define TPS65218_INT1_PB BIT(3) -- cgit v1.2.3 From 59cc1f61f09c26ce82c308e24b76141e1efe99f8 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 10 Aug 2016 11:05:15 +0200 Subject: net: sched: convert qdisc linked list to hashtable Convert the per-device linked list into a hashtable. The primary motivation for this change is that currently, we're not tracking all the qdiscs in hierarchy (e.g. excluding default qdiscs), as the lookup performed over the linked list by qdisc_match_from_root() is rather expensive. The ultimate goal is to get rid of hidden qdiscs completely, which will bring much more determinism in user experience. Reviewed-by: Cong Wang Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 076df5360ba5..96e0b6cd964e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -52,6 +52,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -1800,6 +1801,9 @@ struct net_device { unsigned int num_tx_queues; unsigned int real_num_tx_queues; struct Qdisc *qdisc; +#ifdef CONFIG_NET_SCHED + DECLARE_HASHTABLE (qdisc_hash, 4); +#endif unsigned long tx_queue_len; spinlock_t tx_global_lock; int watchdog_timeo; -- cgit v1.2.3 From 054c67d1c82afde13e475cdd8b7117a5e40bebb1 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 9 Aug 2016 03:51:23 -0400 Subject: qed*: Add support for ethtool link_ksettings callbacks. This patch adds the driver implementation for ethtool link_ksettings callbacks. qed driver now defines/uses the qed specific masks for representing link capability values. qede driver maps these values to to new link modes defined by the kernel implementation of link_ksettings. Please consider applying this to 'net-next' branch. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b1e3c57c7117..737fc4c8db49 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -268,6 +268,21 @@ enum qed_protocol { QED_PROTOCOL_ISCSI, }; +enum qed_link_mode_bits { + QED_LM_FIBRE_BIT = BIT(0), + QED_LM_Autoneg_BIT = BIT(1), + QED_LM_Asym_Pause_BIT = BIT(2), + QED_LM_Pause_BIT = BIT(3), + QED_LM_1000baseT_Half_BIT = BIT(4), + QED_LM_1000baseT_Full_BIT = BIT(5), + QED_LM_10000baseKR_Full_BIT = BIT(6), + QED_LM_25000baseKR_Full_BIT = BIT(7), + QED_LM_40000baseLR4_Full_BIT = BIT(8), + QED_LM_50000baseKR2_Full_BIT = BIT(9), + QED_LM_100000baseKR4_Full_BIT = BIT(10), + QED_LM_COUNT = 11 +}; + struct qed_link_params { bool link_up; -- cgit v1.2.3 From c38162be301d59278f568e0b34be31915b6fe3bb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 16 Jun 2016 11:36:13 +0200 Subject: video: ARM CLCD: backlight support for OF If the device is probed from device tree, we can support backlight. This is used with some systems such as the ST Microelectronics Nomadik. We have to add HAS_IOMEM to the dependencies of CLCD since the backlight class device will now be selected, and if it gets selected on an arch that does not have IOMEM, compilation will fail. Cc: Pawel Moll Cc: Rob Herring Cc: Russell King Signed-off-by: Linus Walleij Signed-off-by: Tomi Valkeinen --- include/linux/amba/clcd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index e82e3ee2c54a..e64c1ccebb76 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -93,6 +93,8 @@ enum { CLCD_CAP_ALL = CLCD_CAP_BGR | CLCD_CAP_RGB, }; +struct backlight_device; + struct clcd_panel { struct fb_videomode mode; signed short width; /* width in mm */ @@ -105,6 +107,7 @@ struct clcd_panel { fixedtimings:1, grayscale:1; unsigned int connector; + struct backlight_device *backlight; }; struct clcd_regs { -- cgit v1.2.3 From 03d14c36af98dd2191c2e35b5ed55ff93b59d345 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 16 Jun 2016 11:36:15 +0200 Subject: video: ARM CLCD: support pads connected in reverse order There are CLCDs connected with the pads in BGR rather than RGB order. It really doesn't matter since the CLCD has a flag and a bit to switch the position of the RGB and BGR components. This is needed to put something logical into the arm,pl11x,tft-r0g0b0-pads property of the device tree on the Nomadik which will then be <16 8 0>. Cc: Pawel Moll Cc: Rob Herring Cc: Russell King Signed-off-by: Linus Walleij Signed-off-by: Tomi Valkeinen --- include/linux/amba/clcd.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index e64c1ccebb76..8b64ec0d574b 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -108,6 +108,12 @@ struct clcd_panel { grayscale:1; unsigned int connector; struct backlight_device *backlight; + /* + * If the B/R lines are switched between the CLCD + * and the panel we need to know this and not try to + * compensate with the BGR bit in the control register. + */ + bool bgr_connection; }; struct clcd_regs { @@ -234,16 +240,22 @@ static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) if (var->grayscale) val |= CNTL_LCDBW; - if (fb->panel->caps && fb->board->caps && - var->bits_per_pixel >= 16) { + if (fb->panel->caps && fb->board->caps && var->bits_per_pixel >= 16) { /* * if board and panel supply capabilities, we can support - * changing BGR/RGB depending on supplied parameters + * changing BGR/RGB depending on supplied parameters. Here + * we switch to what the framebuffer is providing if need + * be, so if the framebuffer is BGR but the display connection + * is RGB (first case) we switch it around. Vice versa mutatis + * mutandis if the framebuffer is RGB but the display connection + * is BGR, we flip it around. */ if (var->red.offset == 0) val &= ~CNTL_BGR; else val |= CNTL_BGR; + if (fb->panel->bgr_connection) + val ^= CNTL_BGR; } switch (var->bits_per_pixel) { -- cgit v1.2.3 From 046ad6cdeb3f83abcbfa2af88ce471afb2e7fc30 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 16 Jun 2016 11:36:16 +0200 Subject: video: ARM CLCD: support Nomadik variant The Nomadik variant has a few special quirks that need to be respected to make the driver work: - The block need to be clocked during writing of the TIMn registers or the bus will stall. - Special bits in the control register select how many of the output display lines get activated. - Special bits in the control register select how to manage the different 565 and 5551 modes. - There is a packed 24bit graphics mode, i.e 888 pixels can be stored in memory is three consecutive bytes, not evenly aligned to a 32bit word. This patch uses the vendor data pointer from the AMBA matching mechanism to track the quirks for this variant, and adds two hooks that variants can use to initialize boards and panels during start-up. These will later be used to adopt a Nomadik board profile. Cc: Pawel Moll Cc: Rob Herring Cc: Russell King Signed-off-by: Linus Walleij Signed-off-by: Tomi Valkeinen --- include/linux/amba/clcd.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index 8b64ec0d574b..1035879b322c 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -67,6 +67,17 @@ #define CNTL_LDMAFIFOTIME (1 << 15) #define CNTL_WATERMARK (1 << 16) +/* ST Microelectronics variant bits */ +#define CNTL_ST_1XBPP_444 0x0 +#define CNTL_ST_1XBPP_5551 (1 << 17) +#define CNTL_ST_1XBPP_565 (1 << 18) +#define CNTL_ST_CDWID_12 0x0 +#define CNTL_ST_CDWID_16 (1 << 19) +#define CNTL_ST_CDWID_18 (1 << 20) +#define CNTL_ST_CDWID_24 ((1 << 19)|(1 << 20)) +#define CNTL_ST_CEAEN (1 << 21) +#define CNTL_ST_LCDBPP24_PACKED (6 << 1) + enum { /* individual formats */ CLCD_CAP_RGB444 = (1 << 0), @@ -179,11 +190,38 @@ struct clcd_board { struct amba_device; struct clk; +/** + * struct clcd_vendor_data - holds hardware (IP-block) vendor-specific + * variant information + * + * @clock_timregs: the CLCD needs to be clocked when accessing the + * timer registers, or the hardware will hang. + * @packed_24_bit_pixels: this variant supports 24bit packed pixel data, + * so that RGB accesses 3 bytes at a time, not just on even 32bit + * boundaries, packing the pixel data in memory. ST Microelectronics + * have this. + * @st_bitmux_control: ST Microelectronics have implemented output + * bit line multiplexing into the CLCD control register. This indicates + * that we need to use this. + * @init_board: custom board init function for this variant + * @init_panel: custom panel init function for this variant + */ +struct clcd_vendor_data { + bool clock_timregs; + bool packed_24_bit_pixels; + bool st_bitmux_control; + int (*init_board)(struct amba_device *adev, + struct clcd_board *board); + int (*init_panel)(struct clcd_fb *fb, + struct device_node *panel); +}; + /* this data structure describes each frame buffer device we find */ struct clcd_fb { struct fb_info fb; struct amba_device *dev; struct clk *clk; + struct clcd_vendor_data *vendor; struct clcd_panel *panel; struct clcd_board *board; void *board_data; @@ -285,6 +323,10 @@ static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) else val |= CNTL_LCDBPP16_444; break; + case 24: + /* Modified variant supporting 24 bit packed pixels */ + val |= CNTL_ST_LCDBPP24_PACKED; + break; case 32: val |= CNTL_LCDBPP24; break; -- cgit v1.2.3 From c7296c51ce5d9aec3ae9fa36b3096983c35bd279 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 5 Jul 2016 18:23:25 +0200 Subject: clk: core: New macro CLK_OF_DECLARE_DRIVER This will be used by drivers that requires initialization at of_clk_init() time and also during platform device probing. Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index a39c0c530778..f403b8a5f8ca 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -780,6 +780,18 @@ extern struct of_device_id __clk_of_table; #define CLK_OF_DECLARE(name, compat, fn) OF_DECLARE_1(clk, name, compat, fn) +/* + * Use this macro when you have a driver that requires two initialization + * routines, one at of_clk_init(), and one at platform device probe + */ +#define CLK_OF_DECLARE_DRIVER(name, compat, fn) \ + static void name##_of_clk_init_driver(struct device_node *np) \ + { \ + of_node_clear_flag(np, OF_POPULATED); \ + fn(np); \ + } \ + OF_DECLARE_1(clk, name, compat, name##_of_clk_init_driver) + #ifdef CONFIG_OF int of_clk_add_provider(struct device_node *np, struct clk *(*clk_src_get)(struct of_phandle_args *args, -- cgit v1.2.3 From 0963679c0c30269c17d5891081cf0896f7d92c4b Mon Sep 17 00:00:00 2001 From: "Anna, Suman" Date: Fri, 12 Aug 2016 18:42:26 -0500 Subject: rpmsg: align code with open parenthesis This patch fixes most of the existing alignment checkpatch check warnings of the type "Alignment should match open parenthesis" in the virtio rpmsg bus code. A couple of them have been left as is to not exceed the 80-char limit. Signed-off-by: Suman Anna Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index ada50ff36da0..565917cdb4d2 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -173,7 +173,7 @@ int __register_rpmsg_driver(struct rpmsg_driver *drv, struct module *owner); void unregister_rpmsg_driver(struct rpmsg_driver *drv); void rpmsg_destroy_ept(struct rpmsg_endpoint *); struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_channel *, - rpmsg_rx_cb_t cb, void *priv, u32 addr); + rpmsg_rx_cb_t cb, void *priv, u32 addr); int rpmsg_send_offchannel_raw(struct rpmsg_channel *, u32, u32, void *, int, bool); @@ -265,7 +265,7 @@ int rpmsg_sendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst) */ static inline int rpmsg_send_offchannel(struct rpmsg_channel *rpdev, u32 src, u32 dst, - void *data, int len) + void *data, int len) { return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); } @@ -340,7 +340,7 @@ int rpmsg_trysendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst) */ static inline int rpmsg_trysend_offchannel(struct rpmsg_channel *rpdev, u32 src, u32 dst, - void *data, int len) + void *data, int len) { return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); } -- cgit v1.2.3 From f145928d496df35fdca86d8af77fe9610fd0055f Mon Sep 17 00:00:00 2001 From: "Anna, Suman" Date: Fri, 12 Aug 2016 18:42:19 -0500 Subject: remoteproc: fix bare unsigned type usage While there is nothing wrong with defining an unsigned integer variable or argument using the bare unsigned type, it is better to use the checkpatch preferred 'unsigned int' type. Signed-off-by: Suman Anna Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 1c457a8dd5a6..f575f1ba42a1 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -435,7 +435,7 @@ struct rproc { struct idr notifyids; int index; struct work_struct crash_handler; - unsigned crash_cnt; + unsigned int crash_cnt; struct completion crash_comp; bool recovery_disabled; int max_notifyid; -- cgit v1.2.3 From 730f84ce6d59732d070a1dfb0d2591ff110e3e5d Mon Sep 17 00:00:00 2001 From: "Anna, Suman" Date: Fri, 12 Aug 2016 18:42:20 -0500 Subject: remoteproc: align code with open parenthesis This patch fixes the existing alignment checkpatch check warnings of the type "Alignment should match open parenthesis" in the remoteproc core source files. Signed-off-by: Suman Anna Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index f575f1ba42a1..8229523f70a5 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -489,8 +489,8 @@ struct rproc_vdev { struct rproc *rproc_get_by_phandle(phandle phandle); struct rproc *rproc_alloc(struct device *dev, const char *name, - const struct rproc_ops *ops, - const char *firmware, int len); + const struct rproc_ops *ops, + const char *firmware, int len); void rproc_put(struct rproc *rproc); int rproc_add(struct rproc *rproc); int rproc_del(struct rproc *rproc); -- cgit v1.2.3 From 334765f45b4db607768b64f4afe9fccf85bd6c0a Mon Sep 17 00:00:00 2001 From: "Anna, Suman" Date: Fri, 12 Aug 2016 18:42:22 -0500 Subject: remoteproc/omap: fix various code formatting issues This patch fixes some of the existing checkpatch warnings in OMAP remoteproc code. The fixes are to the following warnings: 1. WARNING: missing space after return type 2. WARNING: Unnecessary space after function pointer name 3. CHECK: Alignment should match open parenthesis Signed-off-by: Suman Anna Signed-off-by: Bjorn Andersson --- include/linux/platform_data/remoteproc-omap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/remoteproc-omap.h b/include/linux/platform_data/remoteproc-omap.h index bfbd12b41162..71a1b2399c48 100644 --- a/include/linux/platform_data/remoteproc-omap.h +++ b/include/linux/platform_data/remoteproc-omap.h @@ -39,9 +39,9 @@ struct omap_rproc_pdata { const char *firmware; const char *mbox_name; const struct rproc_ops *ops; - int (*device_enable) (struct platform_device *pdev); - int (*device_shutdown) (struct platform_device *pdev); - void(*set_bootaddr)(u32); + int (*device_enable)(struct platform_device *pdev); + int (*device_shutdown)(struct platform_device *pdev); + void (*set_bootaddr)(u32); }; #if defined(CONFIG_OMAP_REMOTEPROC) || defined(CONFIG_OMAP_REMOTEPROC_MODULE) -- cgit v1.2.3 From 4851b1b207d63599d14bd1d316423cd054d90860 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 12 Aug 2016 21:38:44 -0700 Subject: rpmsg: Drop prototypes for non-existing functions The (un)register_rpmsg_device() functions never made it to mainline, so drop them for now. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 565917cdb4d2..2b97c711a5e3 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -167,8 +167,6 @@ struct rpmsg_driver { void (*callback)(struct rpmsg_channel *, void *, int, void *, u32); }; -int register_rpmsg_device(struct rpmsg_channel *dev); -void unregister_rpmsg_device(struct rpmsg_channel *dev); int __register_rpmsg_driver(struct rpmsg_driver *drv, struct module *owner); void unregister_rpmsg_driver(struct rpmsg_driver *drv); void rpmsg_destroy_ept(struct rpmsg_endpoint *); -- cgit v1.2.3 From aed704b7a634954dc28fe5c4b49db478cf2d96b7 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Fri, 12 Aug 2016 08:56:40 -0700 Subject: cgroup: Add task_under_cgroup_hierarchy cgroup inline function to headers This commit adds an inline function to cgroup.h to check whether a given task is under a given cgroup hierarchy. This is to avoid having to put ifdefs in .c files to gate access to cgroups. When cgroups are disabled this always returns true. Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/cgroup.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 984f73b719a9..a4414a11eea7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, return cgrp->ancestor_ids[ancestor->level] == ancestor->id; } +/** + * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry + * @task: the task to be tested + * @ancestor: possible ancestor of @task's cgroup + * + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. + * It follows all the same rules as cgroup_is_descendant, and only applies + * to the default hierarchy. + */ +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + struct css_set *cset = task_css_set(task); + + return cgroup_is_descendant(cset->dfl_cgrp, ancestor); +} + /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { @@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; +struct cgroup; static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, @@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + return true; +} #endif /* !CONFIG_CGROUPS */ /* -- cgit v1.2.3 From 04ed5ad5db6880d53dd1bb8c93e82228a462a4dd Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 17 Jul 2016 01:28:47 +0300 Subject: net/mlx5: Init/Teardown hca commands via mlx5 ifc Remove old representation of manually created Init/Teardown hca commands layout and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b6d15cddb2f..6c343c0b77d2 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -455,30 +455,6 @@ struct mlx5_odp_caps { char reserved2[0xe4]; }; -struct mlx5_cmd_init_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[2]; - __be16 profile; - u8 rsvd1[4]; -}; - -struct mlx5_cmd_init_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_teardown_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[2]; - __be16 profile; - u8 rsvd1[4]; -}; - -struct mlx5_cmd_teardown_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; -- cgit v1.2.3 From 20ed51c643b6296789a48adc3bc2cc875a1612cf Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 17 Jul 2016 00:46:41 +0300 Subject: net/mlx5: Access register and MAD IFC commands via mlx5 ifc Remove old representation of manually created ACCESS_REG/MAD_IFC commands layout and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 6c343c0b77d2..9570c493b50f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1165,35 +1165,6 @@ struct mlx5_dump_mkey_mbox_out { __be32 mkey; }; -struct mlx5_mad_ifc_mbox_in { - struct mlx5_inbox_hdr hdr; - __be16 remote_lid; - u8 rsvd0; - u8 port; - u8 rsvd1[4]; - u8 data[256]; -}; - -struct mlx5_mad_ifc_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - u8 data[256]; -}; - -struct mlx5_access_reg_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[2]; - __be16 register_id; - __be32 arg; - __be32 data[0]; -}; - -struct mlx5_access_reg_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - __be32 data[0]; -}; - #define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { -- cgit v1.2.3 From 20bb566bda7b3e62b67dbb1bd363be40b5ae81c3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 17 Jul 2016 02:01:45 +0300 Subject: net/mlx5: MCG commands via mlx5 ifc Remove old representation of manually created MCG commands layout and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 21bc4557b67a..3f70fc9c2fc9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -152,7 +152,7 @@ enum { MLX5_CMD_OP_CONFIG_INT_MODERATION = 0x804, MLX5_CMD_OP_ACCESS_REG = 0x805, MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, - MLX5_CMD_OP_DETTACH_FROM_MCG = 0x807, + MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, MLX5_CMD_OP_GET_DROPPED_PACKET_LOG = 0x80a, MLX5_CMD_OP_MAD_IFC = 0x50d, MLX5_CMD_OP_QUERY_MAD_DEMUX = 0x80b, -- cgit v1.2.3 From 73b626c182dff06867ceba996a819e8372c9b2ce Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 16 Jul 2016 03:26:15 +0300 Subject: net/mlx5: EQ commands via mlx5 ifc Remove old representation of manually created EQ commands layout, and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 74 --------------------------------------------- include/linux/mlx5/driver.h | 2 +- 2 files changed, 1 insertion(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9570c493b50f..c84e0ba5b261 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -995,80 +995,6 @@ struct mlx5_disable_hca_mbox_out { u8 rsvd[8]; }; -struct mlx5_eq_context { - u8 status; - u8 ec_oi; - u8 st; - u8 rsvd2[7]; - __be16 page_pffset; - __be32 log_sz_usr_page; - u8 rsvd3[7]; - u8 intr; - u8 log_page_size; - u8 rsvd4[15]; - __be32 consumer_counter; - __be32 produser_counter; - u8 rsvd5[16]; -}; - -struct mlx5_create_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[3]; - u8 input_eqn; - u8 rsvd1[4]; - struct mlx5_eq_context ctx; - u8 rsvd2[8]; - __be64 events_mask; - u8 rsvd3[176]; - __be64 pas[0]; -}; - -struct mlx5_create_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[3]; - u8 eq_number; - u8 rsvd1[4]; -}; - -struct mlx5_destroy_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[3]; - u8 eqn; - u8 rsvd1[4]; -}; - -struct mlx5_destroy_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_map_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be64 mask; - u8 mu; - u8 rsvd0[2]; - u8 eqn; - u8 rsvd1[24]; -}; - -struct mlx5_map_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[3]; - u8 eqn; - u8 rsvd1[4]; -}; - -struct mlx5_query_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - struct mlx5_eq_context ctx; -}; - enum { MLX5_MKEY_STATUS_FREE = 1 << 6, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ccea6fb16482..eed4b612572d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -865,7 +865,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - struct mlx5_query_eq_mbox_out *out, int outlen); + u32 *out, int outlen); int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 278277866334e515141dde7c8ac143e15c0a767f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 16 Jul 2016 02:33:22 +0300 Subject: {net,IB}/mlx5: CQ commands via mlx5 ifc Remove old representation of manually created CQ commands layout, and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/cq.h | 6 ++-- include/linux/mlx5/device.h | 76 --------------------------------------------- 2 files changed, 3 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 2566f6d6444f..7c3c0d3aca37 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -170,12 +170,12 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, int mlx5_init_cq_table(struct mlx5_core_dev *dev); void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_create_cq_mbox_in *in, int inlen); + u32 *in, int inlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_query_cq_mbox_out *out); + u32 *out, int outlen); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_modify_cq_mbox_in *in, int in_sz); + u32 *in, int inlen); int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u16 cq_period, u16 cq_max_count); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index c84e0ba5b261..5a1c1606bdbd 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -899,82 +899,6 @@ struct mlx5_arm_srq_mbox_out { u8 rsvd[8]; }; -struct mlx5_cq_context { - u8 status; - u8 cqe_sz_flags; - u8 st; - u8 rsvd3; - u8 rsvd4[6]; - __be16 page_offset; - __be32 log_sz_usr_page; - __be16 cq_period; - __be16 cq_max_count; - __be16 rsvd20; - __be16 c_eqn; - u8 log_pg_sz; - u8 rsvd25[7]; - __be32 last_notified_index; - __be32 solicit_producer_index; - __be32 consumer_counter; - __be32 producer_counter; - u8 rsvd48[8]; - __be64 db_record_addr; -}; - -struct mlx5_create_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_cqn; - u8 rsvdx[4]; - struct mlx5_cq_context ctx; - u8 rsvd6[192]; - __be64 pas[0]; -}; - -struct mlx5_create_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 cqn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 cqn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; -}; - -struct mlx5_query_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 cqn; - u8 rsvd0[4]; -}; - -struct mlx5_query_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; - struct mlx5_cq_context ctx; - u8 rsvd6[16]; - __be64 pas[0]; -}; - -struct mlx5_modify_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 cqn; - __be32 field_select; - struct mlx5_cq_context ctx; - u8 rsvd[192]; - __be64 pas[0]; -}; - -struct mlx5_modify_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - struct mlx5_enable_hca_mbox_in { struct mlx5_inbox_hdr hdr; u8 rsvd[8]; -- cgit v1.2.3 From ec22eb53106be1472ba6573dc900943f52f8fd1e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 16 Jul 2016 06:28:36 +0300 Subject: {net,IB}/mlx5: MKey/PSV commands via mlx5 ifc Remove old representation of manually created MKey/PSV commands layout, and use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 113 +++--------------------------------------- include/linux/mlx5/driver.h | 11 ++-- include/linux/mlx5/mlx5_ifc.h | 2 +- 3 files changed, 15 insertions(+), 111 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 5a1c1606bdbd..fb002db1e2f0 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -197,19 +197,6 @@ enum { MLX5_PCIE_CTRL_TPH_MASK = 3 << 4, }; -enum { - MLX5_ACCESS_MODE_PA = 0, - MLX5_ACCESS_MODE_MTT = 1, - MLX5_ACCESS_MODE_KLM = 2 -}; - -enum { - MLX5_MKEY_REMOTE_INVAL = 1 << 24, - MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, - MLX5_MKEY_BSF_EN = 1 << 30, - MLX5_MKEY_LEN64 = 1 << 31, -}; - enum { MLX5_EN_RD = (u64)1, MLX5_EN_WR = (u64)2 @@ -923,6 +910,13 @@ enum { MLX5_MKEY_STATUS_FREE = 1 << 6, }; +enum { + MLX5_MKEY_REMOTE_INVAL = 1 << 24, + MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, + MLX5_MKEY_BSF_EN = 1 << 30, + MLX5_MKEY_LEN64 = 1 << 31, +}; + struct mlx5_mkey_seg { /* This is a two bit field occupying bits 31-30. * bit 31 is always 0, @@ -945,105 +939,12 @@ struct mlx5_mkey_seg { u8 rsvd4[4]; }; -struct mlx5_query_special_ctxs_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_special_ctxs_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 dump_fill_mkey; - __be32 reserved_lkey; -}; - -struct mlx5_create_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_mkey_index; - __be32 flags; - struct mlx5_mkey_seg seg; - u8 rsvd1[16]; - __be32 xlat_oct_act_size; - __be32 rsvd2; - u8 rsvd3[168]; - __be64 pas[0]; -}; - -struct mlx5_create_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 mkey; - u8 rsvd[4]; -}; - -struct mlx5_destroy_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 mkey; - u8 rsvd[4]; -}; - -struct mlx5_destroy_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 mkey; -}; - -struct mlx5_query_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - __be64 pas[0]; -}; - -struct mlx5_modify_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 mkey; - __be64 pas[0]; -}; - -struct mlx5_modify_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_dump_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; -}; - -struct mlx5_dump_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 mkey; -}; - #define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 }; -struct mlx5_allocate_psv_in { - struct mlx5_inbox_hdr hdr; - __be32 npsv_pd; - __be32 rsvd_psv0; -}; - -struct mlx5_allocate_psv_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - __be32 psv_idx[4]; -}; - -struct mlx5_destroy_psv_in { - struct mlx5_inbox_hdr hdr; - __be32 psv_number; - u8 rsvd[4]; -}; - -struct mlx5_destroy_psv_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index eed4b612572d..173817187abb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -807,15 +807,18 @@ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); +int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen, + u32 *out, int outlen, + mlx5_cmd_cbk_t callback, void *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - struct mlx5_create_mkey_mbox_in *in, int inlen, - mlx5_cmd_cbk_t callback, void *context, - struct mlx5_create_mkey_mbox_out *out); + u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey); int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - struct mlx5_query_mkey_mbox_out *out, int outlen); + u32 *out, int outlen); int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3f70fc9c2fc9..2a39a06dbad4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3489,7 +3489,7 @@ struct mlx5_ifc_query_special_contexts_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x20]; + u8 dump_fill_mkey[0x20]; u8 resd_lkey[0x20]; }; -- cgit v1.2.3 From e79c6a4fc923eed2bdd3b716e0f01414847db90a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 10 Aug 2016 14:36:02 -0700 Subject: net: make net namespace sysctls belong to container's owner If net namespace is attached to a user namespace let's make container's root owner of sysctls affecting said network namespace instead of global root. This also allows us to clean up net_ctl_permissions() because we do not need to fudge permissions anymore for the container's owner since it now owns the objects in question. Acked-by: "Eric W. Biederman" Signed-off-by: Dmitry Torokhov Signed-off-by: David S. Miller --- include/linux/sysctl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 697e160c78d0..d82cb6011e77 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -25,6 +25,7 @@ #include #include #include +#include #include /* For the /proc/sys support */ @@ -157,6 +158,9 @@ struct ctl_table_root { struct ctl_table_set default_set; struct ctl_table_set *(*lookup)(struct ctl_table_root *root, struct nsproxy *namespaces); + void (*set_ownership)(struct ctl_table_header *head, + struct ctl_table *table, + kuid_t *uid, kgid_t *gid); int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); }; -- cgit v1.2.3 From d6b76c4ddb124dd22c6e910ca9332e472e7b3273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 10 Aug 2016 11:56:46 +0200 Subject: USB: bcma: support old USB 2.0 controller on Northstar devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently bcma-hcd driver handles 3 different bcma cores: 1) BCMA_CORE_USB20_HOST (0x819) 2) BCMA_CORE_NS_USB20 (0x504) 3) BCMA_CORE_NS_USB30 (0x505) The first one was introduced years ago and so far was used on MIPS devices only. All Northstar (ARM) devices were using other two cores which allowed easy implementation of separated initialization paths. It seems however Broadcom decided to reuse this old USB 2.0 controller on some recently introduced cheaper Northstar BCM53573 SoCs. I noticed this on Tenda AC9 (based on BCM47189B0 belonging to BCM53573 family). There is no difference in this old controller core identification between MIPS and ARM devices: they share the same id and revision. We need different controller initialization procedure however. To handle this add a check for architecture and implement required initialization for ARM case. Signed-off-by: Rafał Miłecki Signed-off-by: Greg Kroah-Hartman --- include/linux/bcma/bcma_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h index ebd5c1fcdea4..4901fb358b07 100644 --- a/include/linux/bcma/bcma_regs.h +++ b/include/linux/bcma/bcma_regs.h @@ -10,6 +10,7 @@ #define BCMA_CLKCTLST_HAVEALPREQ 0x00000008 /* ALP available request */ #define BCMA_CLKCTLST_HAVEHTREQ 0x00000010 /* HT available request */ #define BCMA_CLKCTLST_HWCROFF 0x00000020 /* Force HW clock request off */ +#define BCMA_CLKCTLST_HQCLKREQ 0x00000040 /* HQ Clock */ #define BCMA_CLKCTLST_EXTRESREQ 0x00000700 /* Mask of external resource requests */ #define BCMA_CLKCTLST_EXTRESREQ_SHIFT 8 #define BCMA_CLKCTLST_HAVEALP 0x00010000 /* ALP available */ -- cgit v1.2.3 From 9bb04a0c4e261187be904d05c2bcd1da0eebc20c Mon Sep 17 00:00:00 2001 From: Jonathan Yong Date: Sat, 11 Jun 2016 14:13:38 -0500 Subject: PCI: Add Precision Time Measurement (PTM) support Add Precision Time Measurement (PTM) support (see PCIe r3.1, sec 6.22). Enable PTM on PTM Root devices and switch ports. This does not enable PTM on endpoints. There currently are no PTM-capable devices on the market, but it is expected to be supported by the Intel Apollo Lake platform. [bhelgaas: complete rework] Signed-off-by: Jonathan Yong Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 2599a980340f..96c509fa9d46 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -367,6 +367,11 @@ struct pci_dev { int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ + +#ifdef CONFIG_PCIE_PTM + unsigned int ptm_root:1; + unsigned int ptm_enabled:1; +#endif #ifdef CONFIG_PCI_MSI const struct attribute_group **msi_irq_groups; #endif -- cgit v1.2.3 From 565f9b073f37e5cb6f80ad0ad71ec1144f87fd9f Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 16 Aug 2016 11:50:32 -0700 Subject: regulator: Remove support for optional supplies in the bulk API The patch was based on my missinterpretation of the API and only accidentally worked for me. Let's clean it out to not confuse others. This reverts commit 3ff3f518a135fa4592fe2817e9ac2cce1fa23dc2. Signed-off-by: Bjorn Andersson Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index cae500b2c1d7..692108222271 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -140,8 +140,6 @@ struct regulator; * * @supply: The name of the supply. Initialised by the user before * using the bulk regulator APIs. - * @optional: The supply should be considered optional. Initialised by the user - * before using the bulk regulator APIs. * @consumer: The regulator consumer for the supply. This will be managed * by the bulk API. * @@ -151,7 +149,6 @@ struct regulator; */ struct regulator_bulk_data { const char *supply; - bool optional; struct regulator *consumer; /* private: Internal use */ -- cgit v1.2.3 From 58919e83c85c3a3c5fb34025dc0e95ddd998c478 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 16 Aug 2016 22:14:55 +0200 Subject: cpufreq / sched: Pass flags to cpufreq_update_util() It is useful to know the reason why cpufreq_update_util() has just been called and that can be passed as flags to cpufreq_update_util() and to the ->func() callback in struct update_util_data. However, doing that in addition to passing the util and max arguments they already take would be clumsy, so avoid it. Instead, use the observation that the schedutil governor is part of the scheduler proper, so it can access scheduler data directly. This allows the util and max arguments of cpufreq_update_util() and the ->func() callback in struct update_util_data to be replaced with a flags one, but schedutil has to be modified to follow. Thus make the schedutil governor obtain the CFS utilization information from the scheduler and use the "RT" and "DL" flags instead of the special utilization value of ULONG_MAX to track updates from the RT and DL sched classes. Make it non-modular too to avoid having to export scheduler variables to modules at large. Next, update all of the other users of cpufreq_update_util() and the ->func() callback in struct update_util_data accordingly. Suggested-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar --- include/linux/sched.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 62c68e513e39..b0fa726b7f31 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3469,15 +3469,19 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } +#define SCHED_CPUFREQ_RT (1U << 0) +#define SCHED_CPUFREQ_DL (1U << 1) + +#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) + #ifdef CONFIG_CPU_FREQ struct update_util_data { - void (*func)(struct update_util_data *data, - u64 time, unsigned long util, unsigned long max); + void (*func)(struct update_util_data *data, u64 time, unsigned int flags); }; void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, - void (*func)(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max)); + void (*func)(struct update_util_data *data, u64 time, + unsigned int flags)); void cpufreq_remove_update_util_hook(int cpu); #endif /* CONFIG_CPU_FREQ */ -- cgit v1.2.3 From 09a7d9eca1a6cf5eb4f9abfdf8914db9dbd96f08 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Jul 2016 01:17:59 +0300 Subject: {net,IB}/mlx5: QP/XRCD commands via mlx5 ifc Remove old representation of manually created QP/XRCD commands layout amd use mlx5_ifc canonical structures and defines. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 5 +- include/linux/mlx5/qp.h | 108 +++--------------------------------------- 2 files changed, 11 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2a39a06dbad4..cb94ac5b8420 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1966,7 +1966,10 @@ struct mlx5_ifc_qpc_bits { u8 reserved_at_3e0[0x8]; u8 cqn_snd[0x18]; - u8 reserved_at_400[0x40]; + u8 reserved_at_400[0x8]; + u8 deth_sqpn[0x18]; + + u8 reserved_at_420[0x20]; u8 reserved_at_440[0x8]; u8 last_acked_psn[0x18]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 7879bf411891..16e1efecaf66 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -123,12 +123,13 @@ enum { }; enum { - MLX5_NON_ZERO_RQ = 0 << 24, - MLX5_SRQ_RQ = 1 << 24, - MLX5_CRQ_RQ = 2 << 24, - MLX5_ZERO_LEN_RQ = 3 << 24 + MLX5_NON_ZERO_RQ = 0x0, + MLX5_SRQ_RQ = 0x1, + MLX5_CRQ_RQ = 0x2, + MLX5_ZERO_LEN_RQ = 0x3 }; +/* TODO REM */ enum { /* params1 */ MLX5_QP_BIT_SRE = 1 << 15, @@ -177,12 +178,6 @@ enum { MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, }; -enum { - MLX5_QP_LAT_SENSITIVE = 1 << 28, - MLX5_QP_BLOCK_MCAST = 1 << 30, - MLX5_QP_ENABLE_SIG = 1 << 31, -}; - enum { MLX5_RCV_DBR = 0, MLX5_SND_DBR = 1, @@ -525,34 +520,6 @@ struct mlx5_qp_context { u8 rsvd1[24]; }; -struct mlx5_create_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_qpn; - u8 rsvd0[4]; - __be32 opt_param_mask; - u8 rsvd1[4]; - struct mlx5_qp_context ctx; - u8 rsvd3[16]; - __be64 pas[0]; -}; - -struct mlx5_create_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 qpn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; -}; - struct mlx5_modify_qp_mbox_in { struct mlx5_inbox_hdr hdr; __be32 qpn; @@ -568,56 +535,6 @@ struct mlx5_modify_qp_mbox_out { u8 rsvd0[8]; }; -struct mlx5_query_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd[4]; -}; - -struct mlx5_query_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd1[8]; - __be32 optparam; - u8 rsvd0[4]; - struct mlx5_qp_context ctx; - u8 rsvd2[16]; - __be64 pas[0]; -}; - -struct mlx5_conf_sqp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd[3]; - u8 type; -}; - -struct mlx5_conf_sqp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_xrcd_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_xrcd_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 xrcdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_xrcd_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 xrcdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_xrcd_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); @@ -628,20 +545,9 @@ static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, return radix_tree_lookup(&dev->priv.mkey_table.tree, key); } -struct mlx5_page_fault_resume_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 flags_qpn; - u8 reserved[4]; -}; - -struct mlx5_page_fault_resume_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_create_qp_mbox_in *in, + u32 *in, int inlen); int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, struct mlx5_modify_qp_mbox_in *in, int sqd_event, @@ -649,7 +555,7 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_query_qp_mbox_out *out, int outlen); + u32 *out, int outlen); int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); -- cgit v1.2.3 From 1a412fb1caa2c1b77719ccb5ed8b0c3c2bc65da7 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Jul 2016 18:03:21 +0300 Subject: {net,IB}/mlx5: Modify QP commands via mlx5 ifc Prior to this patch we assumed that modify QP commands have the same layout. In ConnectX-4 for each QP transition there is a specific command and their layout can vary. e.g: 2err/2rst commands don't have QP context in their layout and before this patch we posted the QP context in those commands. Fortunately the FW only checks the suffix of the commands and executes them, while ignoring all invalid data sent after the valid command layout. This patch removes mlx5_modify_qp_mbox_in and changes mlx5_core_qp_modify to receive the required transition and QP context with opt_param_mask if needed. This way the caller is not required to provide the command inbox layout and it will be generated automatically. mlx5_core_qp_modify will generate the command inbox/outbox layouts according to the requested transition and will fill the requested parameters. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/qp.h | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 16e1efecaf66..0aacb2a7480d 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -479,6 +479,7 @@ struct mlx5_qp_path { u8 rmac[6]; }; +/* FIXME: use mlx5_ifc.h qpc */ struct mlx5_qp_context { __be32 flags; __be32 flags_pd; @@ -520,21 +521,6 @@ struct mlx5_qp_context { u8 rsvd1[24]; }; -struct mlx5_modify_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd0[4]; - __be32 optparam; - u8 rsvd1[4]; - struct mlx5_qp_context ctx; - u8 rsvd2[16]; -}; - -struct mlx5_modify_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; -}; - static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); @@ -549,8 +535,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *in, int inlen); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, - struct mlx5_modify_qp_mbox_in *in, int sqd_event, +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, + u32 opt_param_mask, void *qpc, struct mlx5_core_qp *qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); -- cgit v1.2.3 From c4f287c4a6ac489c18afc4acc4353141a8c53070 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 19 Jul 2016 20:17:12 +0300 Subject: net/mlx5: Unify and improve command interface Now as all commands use mlx5 ifc interface, instead of doing two calls for executing a command we embed command status checking into mlx5_cmd_exec to simplify the interface. Also we do here some cleanup for redundant software structures (inbox/outbox) and functions and improved command failure output. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 115 -------------------------------------------- include/linux/mlx5/driver.h | 7 +-- 2 files changed, 4 insertions(+), 118 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index fb002db1e2f0..2575070c836e 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -398,33 +398,6 @@ enum { MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 }; -struct mlx5_inbox_hdr { - __be16 opcode; - u8 rsvd[4]; - __be16 opmod; -}; - -struct mlx5_outbox_hdr { - u8 status; - u8 rsvd[3]; - __be32 syndrome; -}; - -struct mlx5_cmd_query_adapter_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_query_adapter_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[24]; - u8 intapin; - u8 rsvd1[13]; - __be16 vsd_vendor_id; - u8 vsd[208]; - u8 vsd_psid[16]; -}; - enum mlx5_odp_transport_cap_bits { MLX5_ODP_SUPPORT_SEND = 1 << 31, MLX5_ODP_SUPPORT_RECV = 1 << 30, @@ -457,7 +430,6 @@ struct mlx5_cmd_layout { u8 status_own; }; - struct health_buffer { __be32 assert_var[5]; __be32 rsvd0[3]; @@ -819,93 +791,6 @@ struct mlx5_cqe128 { struct mlx5_cqe64 cqe64; }; -struct mlx5_srq_ctx { - u8 state_log_sz; - u8 rsvd0[3]; - __be32 flags_xrcd; - __be32 pgoff_cqn; - u8 rsvd1[4]; - u8 log_pg_sz; - u8 rsvd2[7]; - __be32 pd; - __be16 lwm; - __be16 wqe_cnt; - u8 rsvd3[8]; - __be64 db_record; -}; - -struct mlx5_create_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_srqn; - u8 rsvd0[4]; - struct mlx5_srq_ctx ctx; - u8 rsvd1[208]; - __be64 pas[0]; -}; - -struct mlx5_create_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 srqn; - u8 rsvd[4]; -}; - -struct mlx5_destroy_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 srqn; - u8 rsvd[4]; -}; - -struct mlx5_destroy_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 srqn; - u8 rsvd0[4]; -}; - -struct mlx5_query_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; - struct mlx5_srq_ctx ctx; - u8 rsvd1[32]; - __be64 pas[0]; -}; - -struct mlx5_arm_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 srqn; - __be16 rsvd; - __be16 lwm; -}; - -struct mlx5_arm_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_enable_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_enable_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_disable_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_disable_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - enum { MLX5_MKEY_STATUS_FREE = 1 << 6, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 173817187abb..ebe57abf3324 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -771,14 +771,15 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); -int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); -int mlx5_cmd_status_to_err_v2(void *ptr); -int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); + int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context); +void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); + +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); -- cgit v1.2.3 From 9def7121bed3be8a9d126c900ca7067647bc4789 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 3 Aug 2016 17:27:30 +0300 Subject: net/mlx5: Enable setting minimum inline header mode for VFs Implement the low-level part of the PF side in setting minimum inline header mode for VFs. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 2 +- include/linux/mlx5/vport.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cb94ac5b8420..7a8ef0af94e7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4724,7 +4724,7 @@ struct mlx5_ifc_modify_nic_vport_field_select_bits { u8 reserved_at_0[0x16]; u8 node_guid[0x1]; u8 port_guid[0x1]; - u8 reserved_at_18[0x1]; + u8 min_inline[0x1]; u8 mtu[0x1]; u8 change_event[0x1]; u8 promisc[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index e087b7d047ac..451b0bde9083 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline); +int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 min_inline); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, u16 vport, u8 *addr); int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); -- cgit v1.2.3 From 7adbde2035c2e5baf2f6a90eba11813db4813a67 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 3 Aug 2016 15:08:33 +0300 Subject: net/mlx5: Update mlx5_ifc.h for vxlan encap/decap Add the required definitions related to vxlan encap/decap. Signed-off-by: Hadar Hen Zion Signed-off-by: Ilya Lesokhin Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 105 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7a8ef0af94e7..3766110e13ea 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -212,6 +212,8 @@ enum { MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, + MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, MLX5_CMD_OP_MAX }; @@ -281,7 +283,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 modify_root[0x1]; u8 identified_miss_table_mode[0x1]; u8 flow_table_modify[0x1]; - u8 reserved_at_7[0x19]; + u8 encap[0x1]; + u8 decap[0x1]; + u8 reserved_at_9[0x17]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; @@ -512,7 +516,15 @@ struct mlx5_ifc_e_switch_cap_bits { u8 nic_vport_node_guid_modify[0x1]; u8 nic_vport_port_guid_modify[0x1]; - u8 reserved_at_20[0x7e0]; + u8 vxlan_encap_decap[0x1]; + u8 nvgre_encap_decap[0x1]; + u8 reserved_at_22[0x9]; + u8 log_max_encap_headers[0x5]; + u8 reserved_2b[0x6]; + u8 max_encap_header_size[0xa]; + + u8 reserved_40[0x7c0]; + }; struct mlx5_ifc_qos_cap_bits { @@ -2067,6 +2079,8 @@ enum { MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, + MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10, + MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20, }; struct mlx5_ifc_flow_context_bits { @@ -2086,7 +2100,9 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_a0[0x8]; u8 flow_counter_list_size[0x18]; - u8 reserved_at_c0[0x140]; + u8 encap_id[0x20]; + + u8 reserved_at_e0[0x120]; struct mlx5_ifc_fte_match_param_bits match_value; @@ -4216,6 +4232,85 @@ struct mlx5_ifc_query_eq_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_encap_header_in_bits { + u8 reserved_at_0[0x5]; + u8 header_type[0x3]; + u8 reserved_at_8[0xe]; + u8 encap_header_size[0xa]; + + u8 reserved_at_20[0x10]; + u8 encap_header[2][0x8]; + + u8 more_encap_header[0][0x8]; +}; + +struct mlx5_ifc_query_encap_header_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0xa0]; + + struct mlx5_ifc_encap_header_in_bits encap_header[0]; +}; + +struct mlx5_ifc_query_encap_header_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 encap_id[0x20]; + + u8 reserved_at_60[0xa0]; +}; + +struct mlx5_ifc_alloc_encap_header_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 encap_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_alloc_encap_header_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0xa0]; + + struct mlx5_ifc_encap_header_in_bits encap_header; +}; + +struct mlx5_ifc_dealloc_encap_header_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_dealloc_encap_header_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_20[0x10]; + u8 op_mod[0x10]; + + u8 encap_id[0x20]; + + u8 reserved_60[0x20]; +}; + struct mlx5_ifc_query_dct_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6102,7 +6197,9 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_a0[0x20]; - u8 reserved_at_c0[0x4]; + u8 encap_en[0x1]; + u8 decap_en[0x1]; + u8 reserved_at_c2[0x2]; u8 table_miss_mode[0x4]; u8 level[0x8]; u8 reserved_at_d0[0x8]; -- cgit v1.2.3 From 8457a1b49a2af0a0e71f80afed9f7c80de361610 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 15 Aug 2016 06:15:35 -0700 Subject: extcon: Introduce EXTCON_PROP_USB_SS property for SuperSpeed mode EXTCON_PROP_USB_SS (SuperSpeed)[1] is necessary to distinguish between USB/USB2 and USB3 connections on USB Type-C cables. [1] https://en.wikipedia.org/wiki/USB#Overview Cc: Chris Zhong Signed-off-by: Guenter Roeck Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 461abee969b7..b34d1ae9011f 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -109,12 +109,18 @@ * @type: integer (intval) * @value: 0 (normal) or 1 (flip) * @default: 0 (normal) + * - EXTCON_PROP_USB_SS (SuperSpeed) + * @type: integer (intval) + * @value: 0 (USB/USB2) or 1 (USB3) + * @default: 0 (USB/USB2) + * */ #define EXTCON_PROP_USB_VBUS 0 #define EXTCON_PROP_USB_TYPEC_POLARITY 1 +#define EXTCON_PROP_USB_SS 2 #define EXTCON_PROP_USB_MIN 0 -#define EXTCON_PROP_USB_MAX 1 +#define EXTCON_PROP_USB_MAX 2 #define EXTCON_PROP_USB_CNT (EXTCON_PROP_USB_MAX - EXTCON_PROP_USB_MIN + 1) /* Properties of EXTCON_TYPE_CHG. */ -- cgit v1.2.3 From ddf711872c9d2b05b0fb25db3e6e0c2a50be39e3 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 11 Aug 2016 14:52:50 -0700 Subject: remoteproc: Introduce auto-boot flag Introduce an "auto-boot" flag on rprocs to make it possible to flag remote processors without vdevs to automatically boot once the firmware is found. Preserve previous behavior of the wkup_m3 processor being explicitly booted by a consumer. Cc: Lee Jones Cc: Loic Pallardy Cc: Suman Anna Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 8229523f70a5..4783c8c4645a 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -443,6 +443,7 @@ struct rproc { struct resource_table *cached_table; u32 table_csum; bool has_iommu; + bool auto_boot; }; /* we currently support only two vrings per rvdev */ -- cgit v1.2.3 From 988d204cdaf604c59316dadb98eba2da2188b762 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 11 Aug 2016 14:52:53 -0700 Subject: remoteproc: Move handling of cached table to boot/shutdown As we moved the vdev handling to the main boot/shutdown code path we can further simplify the resource table handling by moving the parsing spet to boot as well. The lifespan of the resource table is changed to live from rproc_boot() to rproc_shutdown(). Cc: Lee Jones Cc: Loic Pallardy Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 4783c8c4645a..d488f9e1e08c 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -409,7 +409,6 @@ enum rproc_crash_type { * @max_notifyid: largest allocated notify id. * @table_ptr: pointer to the resource table in effect * @cached_table: copy of the resource table - * @table_csum: checksum of the resource table * @has_iommu: flag to indicate if remote processor is behind an MMU */ struct rproc { @@ -441,7 +440,6 @@ struct rproc { int max_notifyid; struct resource_table *table_ptr; struct resource_table *cached_table; - u32 table_csum; bool has_iommu; bool auto_boot; }; -- cgit v1.2.3 From 29dd3288705f26cc27663e79061209dabce2d5b9 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 17 Aug 2016 15:06:08 +0530 Subject: bitmap.h, perf/core: Fix the mask in perf_output_sample_regs() When decoding the perf_regs mask in perf_output_sample_regs(), we loop through the mask using find_first_bit and find_next_bit functions. While the exisiting code works fine in most of the case, the logic is broken for big-endian 32-bit kernels. When reading a u64 mask using (u32 *)(&val)[0], find_*_bit() assumes that it gets the lower 32 bits of u64, but instead it gets the upper 32 bits - which is wrong. The fix is to swap the words of the u64 to handle this case. This is _not_ a regular endianness swap. Suggested-by: Yury Norov Signed-off-by: Madhavan Srinivasan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yury Norov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1471426568-31051-2-git-send-email-maddy@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/bitmap.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 598bc999f4c2..3b77588a9360 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -339,6 +339,24 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen, return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); } +/* + * bitmap_from_u64 - Check and swap words within u64. + * @mask: source bitmap + * @dst: destination bitmap + * + * In 32-bit Big Endian kernel, when using (u32 *)(&val)[*] + * to read u64 mask, we will get the wrong word. + * That is "(u32 *)(&val)[0]" gets the upper 32 bits, + * but we expect the lower 32-bits of u64. + */ +static inline void bitmap_from_u64(unsigned long *dst, u64 mask) +{ + dst[0] = mask & ULONG_MAX; + + if (sizeof(mask) > sizeof(unsigned long)) + dst[1] = mask >> 32; +} + #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */ -- cgit v1.2.3 From 4ff6a8debf48a7bf48e93c01da720785070d3a25 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Wed, 17 Aug 2016 13:55:05 -0700 Subject: perf/core: Generalize event->group_flags Currently, PERF_GROUP_SOFTWARE is used in the group_flags field of a group's leader to indicate that is_software_event(event) is true for all events in a group. This is the only usage of event->group_flags. This pattern of setting a group level flags when all events in the group share a property is useful for the flag introduced in the next patch and for future CQM/CMT flags. So this patches generalizes group_flags to work as an aggregate of event level flags. PERF_GROUP_SOFTWARE denotes an inmutable event's property. All other flags that I intend to add are also determinable at event initialization. To better convey the above, this patch renames event's group_flags to group_caps and PERF_GROUP_SOFTWARE to PERF_EV_CAP_SOFTWARE. Individual event flags are stored in the new event->event_caps. Since the cap flags do not change after event initialization, there is no need to serialize event_caps. This new field is used when events are added to a context, similarly to how PERF_GROUP_SOFTWARE and is_software_event() worked. Lastly, for consistency, updates is_software_event() to rely in event_cap instead of the context index. Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Turner Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vegard Nossum Cc: Vince Weaver Link: http://lkml.kernel.org/r/1471467307-61171-3-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 529c41fa73c8..6f7459f72dfd 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -510,9 +510,12 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, struct perf_sample_data *, struct pt_regs *regs); -enum perf_group_flag { - PERF_GROUP_SOFTWARE = 0x1, -}; +/* + * Event capabilities. For event_caps and groups caps. + * + * PERF_EV_CAP_SOFTWARE: Is a software event. + */ +#define PERF_EV_CAP_SOFTWARE BIT(0) #define SWEVENT_HLIST_BITS 8 #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) @@ -568,7 +571,12 @@ struct perf_event { struct hlist_node hlist_entry; struct list_head active_entry; int nr_siblings; - int group_flags; + + /* Not serialized. Only written during event initialization. */ + int event_caps; + /* The cumulative AND of all event_caps for events in this group. */ + int group_caps; + struct perf_event *group_leader; struct pmu *pmu; void *pmu_private; @@ -988,7 +996,7 @@ static inline bool is_sampling_event(struct perf_event *event) */ static inline int is_software_event(struct perf_event *event) { - return event->pmu->task_ctx_nr == perf_sw_context; + return event->event_caps & PERF_EV_CAP_SOFTWARE; } extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; -- cgit v1.2.3 From d6a2f9035bfc27d0e9d78b13635dda9fb017ac01 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Wed, 17 Aug 2016 13:55:06 -0700 Subject: perf/core: Introduce PMU_EV_CAP_READ_ACTIVE_PKG Introduce the flag PMU_EV_CAP_READ_ACTIVE_PKG, useful for uncore events, that allows a PMU to signal the generic perf code that an event is readable in the current CPU if the event is active in a CPU in the same package as the current CPU. This is an optimization that avoids a unnecessary IPI for the common case where uncore events are run and read in the same package but in different CPUs. As an example, the IPI removal speeds up perf_read() in my Haswell system as follows: - For event UNC_C_LLC_LOOKUP: From 260 us to 31 us. - For event RAPL's power/energy-cores/: From to 255 us to 27 us. For the optimization to work, all events in the group must have it (similarly to PERF_EV_CAP_SOFTWARE). Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Carrillo-Cisneros Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Turner Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vegard Nossum Cc: Vince Weaver Link: http://lkml.kernel.org/r/1471467307-61171-4-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6f7459f72dfd..5c5362584aba 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -514,8 +514,11 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, * Event capabilities. For event_caps and groups caps. * * PERF_EV_CAP_SOFTWARE: Is a software event. + * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read + * from any CPU in the package where it is active. */ #define PERF_EV_CAP_SOFTWARE BIT(0) +#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1) #define SWEVENT_HLIST_BITS 8 #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) -- cgit v1.2.3 From 1f6e6c7cb9bcd58abb5ee11243e0eefe6b36fc8e Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Mon, 25 Jul 2016 14:34:22 +0100 Subject: sched/core: Introduce SD_ASYM_CPUCAPACITY sched_domain topology flag Add a topology flag to the sched_domain hierarchy indicating the lowest domain level where the full range of CPU capacities is represented by the domain members for asymmetric capacity topologies (e.g. ARM big.LITTLE). The flag is intended to indicate that extra care should be taken when placing tasks on CPUs and this level spans all the different types of CPUs found in the system (no need to look further up the domain hierarchy). This information is currently only available through iterating through the capacities of all the CPUs at parent levels in the sched_domain hierarchy. SD 2 [ 0 1 2 3] SD_ASYM_CPUCAPACITY SD 1 [ 0 1] [ 2 3] !SD_ASYM_CPUCAPACITY CPU: 0 1 2 3 capacity: 756 756 1024 1024 If the topology in the example above is duplicated to create an eight CPU example with third sched_domain level on top (SD 3), this level should not have the flag set (!SD_ASYM_CPUCAPACITY) as its two group would both have all CPU capacities represented within them. Signed-off-by: Morten Rasmussen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dietmar.eggemann@arm.com Cc: freedom.tan@mediatek.com Cc: keita.kobayashi.ym@renesas.com Cc: mgalbraith@suse.de Cc: sgurrappadi@nvidia.com Cc: vincent.guittot@linaro.org Cc: yuyang.du@intel.com Link: http://lkml.kernel.org/r/1469453670-2660-6-git-send-email-morten.rasmussen@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7f64e89a5873..d75024053e9b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1022,6 +1022,7 @@ extern void wake_up_q(struct wake_q_head *head); #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ +#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ #define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ #define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ -- cgit v1.2.3 From fe7bd58f5d25d5d655b1da4a084cc4ef6f085fee Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Wed, 17 Aug 2016 16:00:33 +0800 Subject: x86/ioapic: Change prototype of acpi_ioapic_add() Change the argument of acpi_ioapic_add() to a generic ACPI handle, and move its prototype from drivers/acpi/internal.h to include/linux/acpi.h so that it can be called from outside the pci_root driver. Signed-off-by: Rui Wang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bhelgaas@google.com Cc: helgaas@kernel.org Cc: linux-acpi@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/1471420837-31003-2-git-send-email-rui.y.wang@intel.com Signed-off-by: Ingo Molnar --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4d8452c2384b..c9a596b9535c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -751,6 +751,12 @@ static inline int acpi_reconfig_notifier_unregister(struct notifier_block *nb) #endif /* !CONFIG_ACPI */ +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +int acpi_ioapic_add(acpi_handle root); +#else +static inline int acpi_ioapic_add(acpi_handle root) { return 0; } +#endif + #ifdef CONFIG_ACPI void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, u32 pm1a_ctrl, u32 pm1b_ctrl)); -- cgit v1.2.3 From 5090cc6ae2f79ee779e5faf7c8a28edf42b7d738 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 17 Aug 2016 21:08:01 +0200 Subject: spi: introduce max_message_size hook in spi_master Recently a maximum transfer size was was introduced in struct spi_master. However there are also spi controllers with a maximum message size, e.g. fsl-espi has a max message size of 64KB. Introduce a hook max_message_size to deal with such limitations. Also make sure that spi_max_transfer_size doesn't return greater values than spi_max_message_size, even if hook max_transfer_size is not set. Signed-off-by: Heiner Kallweit Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 072cb2aa2413..f2d3960cc3c3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -312,6 +312,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @flags: other constraints relevant to this driver * @max_transfer_size: function that returns the max transfer size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. + * @max_message_size: function that returns the max message size for + * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. * @io_mutex: mutex for physical bus access * @bus_lock_spinlock: spinlock for SPI bus locking * @bus_lock_mutex: mutex for exclusion of multiple callers @@ -442,10 +444,11 @@ struct spi_master { #define SPI_MASTER_MUST_TX BIT(4) /* requires tx */ /* - * on some hardware transfer size may be constrained + * on some hardware transfer / message size may be constrained * the limit may depend on device transfer settings */ size_t (*max_transfer_size)(struct spi_device *spi); + size_t (*max_message_size)(struct spi_device *spi); /* I/O mutex */ struct mutex io_mutex; @@ -905,12 +908,26 @@ extern int spi_async_locked(struct spi_device *spi, struct spi_message *message); static inline size_t -spi_max_transfer_size(struct spi_device *spi) +spi_max_message_size(struct spi_device *spi) { struct spi_master *master = spi->master; - if (!master->max_transfer_size) + if (!master->max_message_size) return SIZE_MAX; - return master->max_transfer_size(spi); + return master->max_message_size(spi); +} + +static inline size_t +spi_max_transfer_size(struct spi_device *spi) +{ + struct spi_master *master = spi->master; + size_t tr_max = SIZE_MAX; + size_t msg_max = spi_max_message_size(spi); + + if (master->max_transfer_size) + tr_max = master->max_transfer_size(spi); + + /* transfer size limit must not be greater than messsage size limit */ + return min(tr_max, msg_max); } /*---------------------------------------------------------------------------*/ -- cgit v1.2.3 From 3942a9bd7b5842a924e99ee6ec1350b8006c94ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Aug 2016 18:54:13 +0200 Subject: locking, rcu, cgroup: Avoid synchronize_sched() in __cgroup_procs_write() The current percpu-rwsem read side is entirely free of serializing insns at the cost of having a synchronize_sched() in the write path. The latency of the synchronize_sched() is too high for cgroups. The commit 1ed1328792ff talks about the write path being a fairly cold path but this is not the case for Android which moves task to the foreground cgroup and back around binder IPC calls from foreground processes to background processes, so it is significantly hotter than human initiated operations. Switch cgroup_threadgroup_rwsem into the slow mode for now to avoid the problem, hopefully it should not be that slow after another commit: 80127a39681b ("locking/percpu-rwsem: Optimize readers and reduce global impact"). We could just add rcu_sync_enter() into cgroup_init() but we do not want another synchronize_sched() at boot time, so this patch adds the new helper which doesn't block but currently can only be called before the first use. Reported-by: John Stultz Reported-by: Dmitry Shmidt Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Colin Cross Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rom Lemarchand Cc: Tejun Heo Cc: Thomas Gleixner Cc: Todd Kjos Link: http://lkml.kernel.org/r/20160811165413.GA22807@redhat.com Signed-off-by: Ingo Molnar --- include/linux/rcu_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index a63a33e6196e..ece7ed9a4a70 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -59,6 +59,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) } extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); +extern void rcu_sync_enter_start(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); -- cgit v1.2.3 From 83b502a12e82d0ae97907d415496fbafe044f0ce Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 4 Aug 2016 17:32:02 +0300 Subject: net/mlx5: Modify RQ bitmask from mlx5 ifc Use mlx5 ifc MODIFY_BITMASK_VSD in mlx5e_modify_rq_vsd and expose counter set capability bit in hca caps structure. Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 4 ---- include/linux/mlx5/mlx5_ifc.h | 9 ++++++++- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ebe57abf3324..0ea78b5edbb2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -48,10 +48,6 @@ #include #include -enum { - MLX5_RQ_BITMASK_VSD = 1 << 1, -}; - enum { MLX5_BOARD_ID_LEN = 64, MLX5_MAX_NAME_LEN = 16, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3766110e13ea..e1f8e3491867 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -779,7 +779,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 out_of_seq_cnt[0x1]; u8 vport_counters[0x1]; u8 retransmission_q_counters[0x1]; - u8 reserved_at_183[0x3]; + u8 reserved_at_183[0x1]; + u8 modify_rq_counter_set_id[0x1]; + u8 reserved_at_185[0x1]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; @@ -4750,6 +4752,11 @@ struct mlx5_ifc_modify_rq_out_bits { u8 reserved_at_40[0x40]; }; +enum { + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3, +}; + struct mlx5_ifc_modify_rq_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; -- cgit v1.2.3 From 2e353b3468ecb1d12a44aaf35888f7de47d5c047 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Sun, 3 Jul 2016 14:57:33 +0300 Subject: net/mlx5: Update struct mlx5_ifc_xrqc_bits Update struct mlx5_ifc_xrqc_bits according to last specification Signed-off-by: Artemy Kovalyov Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e1f8e3491867..5f150c849a8f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2829,7 +2829,7 @@ struct mlx5_ifc_xrqc_bits { struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; - u8 reserved_at_180[0x180]; + u8 reserved_at_180[0x200]; struct mlx5_ifc_wq_bits wq; }; -- cgit v1.2.3 From 8cca30a7f914fe363fa9700715619ca5c8cb38cc Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 26 Jun 2016 12:43:24 +0300 Subject: net/mlx5: Expose mlx5e_link_mode The mlx5e_link_mode enumeration will also be used in mlx5_ib for RoCE. This patch moves the enumeration to the mlx5 driver port header file. Signed-off-by: Noa Osherovich Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/port.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e3012cc64b8a..6f876a4770f6 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -61,6 +61,39 @@ enum mlx5_an_status { #define MLX5_I2C_ADDR_HIGH 0x51 #define MLX5_EEPROM_PAGE_LENGTH 256 +enum mlx5e_link_mode { + MLX5E_1000BASE_CX_SGMII = 0, + MLX5E_1000BASE_KX = 1, + MLX5E_10GBASE_CX4 = 2, + MLX5E_10GBASE_KX4 = 3, + MLX5E_10GBASE_KR = 4, + MLX5E_20GBASE_KR2 = 5, + MLX5E_40GBASE_CR4 = 6, + MLX5E_40GBASE_KR4 = 7, + MLX5E_56GBASE_R4 = 8, + MLX5E_10GBASE_CR = 12, + MLX5E_10GBASE_SR = 13, + MLX5E_10GBASE_ER = 14, + MLX5E_40GBASE_SR4 = 15, + MLX5E_40GBASE_LR4 = 16, + MLX5E_50GBASE_SR2 = 18, + MLX5E_100GBASE_CR4 = 20, + MLX5E_100GBASE_SR4 = 21, + MLX5E_100GBASE_KR4 = 22, + MLX5E_100GBASE_LR4 = 23, + MLX5E_100BASE_TX = 24, + MLX5E_1000BASE_T = 25, + MLX5E_10GBASE_T = 26, + MLX5E_25GBASE_CR = 27, + MLX5E_25GBASE_KR = 28, + MLX5E_25GBASE_SR = 29, + MLX5E_50GBASE_CR2 = 30, + MLX5E_50GBASE_KR2 = 31, + MLX5E_LINK_MODES_NUMBER, +}; + +#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -- cgit v1.2.3 From d5beb7f2aff4a60237fd97a98d49a78c9045b8f2 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 2 Jun 2016 10:47:53 +0300 Subject: net/mlx5: Separate query_port_proto_oper for IB and EN Replaced mlx5_query_port_proto_oper with separate functions per link type. The functions should take different arguments so no point in trying to unite them. Signed-off-by: Noa Osherovich Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/port.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 6f876a4770f6..b3065acd20b4 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -103,9 +103,10 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 *proto_admin, int proto_mask); int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port); -int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, int proto_mask, - u8 local_port); +int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, u8 local_port); +int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, + u32 *proto_oper, u8 local_port); int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, u32 proto_admin, int proto_mask); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 84df61ebc69bdc466180e02d654e9b0284781288 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Tue, 10 May 2016 13:47:50 +0300 Subject: net/mlx5: Add HW interfaces used by LAG Exposed LAG commands enum and layouts: - CREATE_LAG HW enters LAG mode: RoCE traffic from port two is received on PF0 core dev. Allows to set tx_affinity (tx port) for QPs and TISes. Allows to port remap QPs and TISes, overriding their tx_affinity behavior. - MODIFY_LAG Remap QPs and TISes to another port. - QUERY_LAG Query whether LAG mode is active. - DESTROY_LAG HW exits LAG mode, returning to non-LAG behavior. - CREATE_VPORT_LAG Merge Ethernet flow steering, such that traffic received on port two jumps to PF0 root flow table. Available only in LAG mode. - DESTROY_VPORT_LAG Ethernet flow steering returns to non-LAG behavior. Caps added: - lag_master Driver is in charge of managing the LAG. This is currently the only option. - num_lag_ports LAG is supported only if this field's value is 2. Other fields: - QP/TIS tx port affinity During LAG, this field controls on which port a QP or TIS resides. - TIS strict tx affinity When this field is set, the TIS will not be subject to port remap by CREATE_LAG/MODIFY_LAG. - LAG demux flow table Flow table used for redirecting non user-space traffic back to PF1 root flow table, if the packet was received on port two. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 166 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 159 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5f150c849a8f..043d5256b754 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -174,6 +174,12 @@ enum { MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY = 0x82b, MLX5_CMD_OP_SET_WOL_ROL = 0x830, MLX5_CMD_OP_QUERY_WOL_ROL = 0x831, + MLX5_CMD_OP_CREATE_LAG = 0x840, + MLX5_CMD_OP_MODIFY_LAG = 0x841, + MLX5_CMD_OP_QUERY_LAG = 0x842, + MLX5_CMD_OP_DESTROY_LAG = 0x843, + MLX5_CMD_OP_CREATE_VPORT_LAG = 0x844, + MLX5_CMD_OP_DESTROY_VPORT_LAG = 0x845, MLX5_CMD_OP_CREATE_TIR = 0x900, MLX5_CMD_OP_MODIFY_TIR = 0x901, MLX5_CMD_OP_DESTROY_TIR = 0x902, @@ -884,7 +890,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pad_tx_eth_packet[0x1]; u8 reserved_at_263[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_at_270[0x10]; + + u8 reserved_at_270[0xb]; + u8 lag_master[0x1]; + u8 num_lag_ports[0x4]; u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; @@ -1918,7 +1927,7 @@ enum { struct mlx5_ifc_qpc_bits { u8 state[0x4]; - u8 reserved_at_4[0x4]; + u8 lag_tx_port_affinity[0x4]; u8 st[0x8]; u8 reserved_at_10[0x3]; u8 pm_state[0x2]; @@ -2167,7 +2176,11 @@ struct mlx5_ifc_traffic_counter_bits { }; struct mlx5_ifc_tisc_bits { - u8 reserved_at_0[0xc]; + u8 strict_lag_tx_port_affinity[0x1]; + u8 reserved_at_1[0x3]; + u8 lag_tx_port_affinity[0x04]; + + u8 reserved_at_8[0x4]; u8 prio[0x4]; u8 reserved_at_10[0x10]; @@ -4617,7 +4630,9 @@ struct mlx5_ifc_modify_tis_out_bits { struct mlx5_ifc_modify_tis_bitmask_bits { u8 reserved_at_0[0x20]; - u8 reserved_at_20[0x1f]; + u8 reserved_at_20[0x1d]; + u8 lag_tx_port_affinity[0x1]; + u8 strict_lag_tx_port_affinity[0x1]; u8 prio[0x1]; }; @@ -6215,7 +6230,10 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_e0[0x8]; u8 table_miss_id[0x18]; - u8 reserved_at_100[0x100]; + u8 reserved_at_100[0x8]; + u8 lag_master_next_table_id[0x18]; + + u8 reserved_at_120[0x80]; }; struct mlx5_ifc_create_flow_group_out_bits { @@ -7669,7 +7687,8 @@ struct mlx5_ifc_set_flow_table_root_in_bits { }; enum { - MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = (1UL << 0), + MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID = (1UL << 15), }; struct mlx5_ifc_modify_flow_table_out_bits { @@ -7708,7 +7727,10 @@ struct mlx5_ifc_modify_flow_table_in_bits { u8 reserved_at_e0[0x8]; u8 table_miss_id[0x18]; - u8 reserved_at_100[0x100]; + u8 reserved_at_100[0x8]; + u8 lag_master_next_table_id[0x18]; + + u8 reserved_at_120[0x80]; }; struct mlx5_ifc_ets_tcn_config_reg_bits { @@ -7816,4 +7838,134 @@ struct mlx5_ifc_dcbx_param_bits { u8 error[0x8]; u8 reserved_at_a0[0x160]; }; + +struct mlx5_ifc_lagc_bits { + u8 reserved_at_0[0x1d]; + u8 lag_state[0x3]; + + u8 reserved_at_20[0x14]; + u8 tx_remap_affinity_2[0x4]; + u8 reserved_at_38[0x4]; + u8 tx_remap_affinity_1[0x4]; +}; + +struct mlx5_ifc_create_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + struct mlx5_ifc_lagc_bits ctx; +}; + +struct mlx5_ifc_modify_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_modify_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + u8 field_select[0x20]; + + struct mlx5_ifc_lagc_bits ctx; +}; + +struct mlx5_ifc_query_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_lagc_bits ctx; +}; + +struct mlx5_ifc_query_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_vport_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_vport_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_vport_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_vport_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From 7907f23adc186700efbe56c032527e47485c86ab Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sun, 17 Apr 2016 16:57:32 +0300 Subject: net/mlx5: Implement RoCE LAG feature Available on dual port cards only, this feature keeps track, using netdev LAG events, of the bonding and link status of each port's PF netdev. When both of the card's PF netdevs are enslaved to the same bond/team master, and only them, LAG state is active. During LAG, only one IB device is present for both ports. In addition to the above, this commit includes FW commands used for managing the LAG, new facilities for adding and removing a single device by interface, and port remap functionality according to bond events. Please note that this feature is currently used only for mimicking Ethernet bonding for RoCE - netdevs functionality is not altered, and their bonding continues to be managed solely by bond/team driver. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0ea78b5edbb2..ed983b8c3213 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -477,6 +477,7 @@ struct mlx5_fc_stats { }; struct mlx5_eswitch; +struct mlx5_lag; struct mlx5_rl_entry { u32 rate; @@ -550,6 +551,7 @@ struct mlx5_priv { struct mlx5_flow_steering *steering; struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; + struct mlx5_lag *lag; unsigned long pci_dev_data; struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; @@ -942,6 +944,8 @@ int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); +bool mlx5_lag_is_active(struct mlx5_core_dev *dev); + struct mlx5_profile { u64 mask; u8 log_max_qp; -- cgit v1.2.3 From 6a32047a441b870dd2570fe0831dada5e9ce40f6 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 9 May 2016 11:06:44 +0000 Subject: net/mlx5: Get RoCE netdev Used by IB driver for determining the IB bond device's netdev, when LAG is active. Returns PF0's netdev if mode is not active-backup, or the PF netdev of the active slave when mode is active-backup. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ed983b8c3213..c568dd927330 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -945,6 +945,7 @@ void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); +struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); struct mlx5_profile { u64 mask; -- cgit v1.2.3 From aaff1bea16bb7f259a263c3ae4633d092e2da799 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 9 May 2016 09:57:05 +0000 Subject: net/mlx5: LAG demux flow table support Add interfaces to allow the creation and destruction of a LAG demux flow table. It is a special flow table used during LAG for redirecting non user-mode packets from PF0 to PF1 root ft, if a packet was received on phys port two. Signed-off-by: Aviv Heller Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e036d6030867..7edfe0b8f1ec 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -106,6 +106,9 @@ mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, u32 level, u16 vport); +struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( + struct mlx5_flow_namespace *ns, + int prio, u32 level); int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); /* inbox should be set with the following values: -- cgit v1.2.3 From 3e75d4ebaae7aac5ba82fc7a6e0e6fb56dac1916 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 9 May 2016 10:02:29 +0000 Subject: net/mlx5: Add LAG flow steering namespace This namespace is used for LAG demux flowtable. The idea is to position the LAG demux ft between bypass and kernel flowtables, allowing raw-eth traffic from both ports to be received by the PF0 IB device. Signed-off-by: Aviv Heller Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 7edfe0b8f1ec..8803212fc3aa 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_LAG, MLX5_FLOW_NAMESPACE_OFFLOADS, MLX5_FLOW_NAMESPACE_ETHTOOL, MLX5_FLOW_NAMESPACE_KERNEL, -- cgit v1.2.3 From 3bc34f3bcb087764796d9a6eaa476e270114eb8f Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Mon, 9 May 2016 10:38:42 +0000 Subject: net/mlx5: Vport LAG creation support Add interfaces for issuing CREATE_VPORT_LAG and DESTROY_VPORT_LAG commands. Used for receiving PF1's eth traffic on PF0's root ft. Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c568dd927330..5cb9fa7aec61 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -944,6 +944,8 @@ int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); +int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); +int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); -- cgit v1.2.3 From cea824d416522ce63d83b45fc0dc53c0f5b68cee Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 31 May 2016 14:09:09 +0300 Subject: net/mlx5: Introduce sniffer steering hardware capabilities Define needed hardware capabilities for sniffer RX and TX flow tables. Add the following capabilities: 1. Sniffer RX flow table capabilities. 2. Sniffer TX flow table capabilities. 3. If same TIR can be used by multiple flow tables of different types. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 12 ++++++++++++ include/linux/mlx5/mlx5_ifc.h | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 2575070c836e..77c141797152 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -964,6 +964,18 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) +#define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap) + +#define MLX5_CAP_FLOWTABLE_SNIFFER_RX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_sniffer.cap) + +#define MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_sniffer.cap) + +#define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 043d5256b754..73a720f74a69 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -483,7 +483,9 @@ struct mlx5_ifc_ads_bits { struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; - u8 reserved_at_1[0x1ff]; + u8 nic_rx_multi_path_tirs_fts[0x1]; + u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; + u8 reserved_at_3[0x1fd]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; -- cgit v1.2.3 From 87d22483ce68e609818d61e3a65361f5634c6cd6 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 6 Jun 2016 18:09:35 +0300 Subject: net/mlx5: Add sniffer namespaces Add sniffer TX and RX namespaces to receive ingoing and outgoing traffic. Each outgoing/incoming packet is duplicated and steered to the sniffer TX/RX namespace in addition to the regular flow. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 8803212fc3aa..93ebc5e21334 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -63,6 +63,8 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_FDB, MLX5_FLOW_NAMESPACE_ESW_EGRESS, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + MLX5_FLOW_NAMESPACE_SNIFFER_RX, + MLX5_FLOW_NAMESPACE_SNIFFER_TX, }; struct mlx5_flow_table; -- cgit v1.2.3 From eec097d43100a8195fd4f678671ecd5d986dd675 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Jun 2016 11:01:51 -0500 Subject: PCI: Add pci_enable_ptm() for drivers to enable PTM on endpoints Add an pci_enable_ptm() interface so drivers can enable PTM. The PCI core enables PTM on PTM Roots and switches automatically, but we don't enable PTM on endpoints unless a driver requests it. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 96c509fa9d46..9e4b6d6f3c8d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1407,6 +1407,13 @@ static inline void pci_disable_ats(struct pci_dev *d) { } static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } #endif +#ifdef CONFIG_PCIE_PTM +int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); +#else +static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity) +{ return -EINVAL; } +#endif + void pci_cfg_access_lock(struct pci_dev *dev); bool pci_cfg_access_trylock(struct pci_dev *dev); void pci_cfg_access_unlock(struct pci_dev *dev); -- cgit v1.2.3 From 255e732c61dbb6a0bf9e0a3d6bc45f202853c880 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Wed, 17 Aug 2016 20:58:28 -0400 Subject: livepatch: use arch_klp_init_object_loaded() to finish arch-specific tasks Introduce arch_klp_init_object_loaded() to complete any additional arch-specific tasks during patching. Architecture code may override this function. Signed-off-by: Jessica Yu Reviewed-by: Petr Mladek Acked-by: Miroslav Benes Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index a93a0b23dc8d..9072f04db616 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -116,6 +116,9 @@ int klp_unregister_patch(struct klp_patch *); int klp_enable_patch(struct klp_patch *); int klp_disable_patch(struct klp_patch *); +void arch_klp_init_object_loaded(struct klp_patch *patch, + struct klp_object *obj); + /* Called from the module loader during module coming/going states */ int klp_module_coming(struct module *mod); void klp_module_going(struct module *mod); -- cgit v1.2.3 From d194fd265e78ca1b2a4607918778446de44818b2 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 19 Aug 2016 08:34:57 +0300 Subject: qed*: Fix pause setting When moving into using ethtool's link_ksetting, qed started supplying its own bitmask of speed/capabilities, but qede is still checking for the SUPPORTED value to determine whether it supports pause. Fixes: 054c67d1c82a ("qed*: Add support for ethtool link_ksettings callbacks") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 3ed7d20e3811..d8dc5c2243d5 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -318,9 +318,11 @@ struct qed_link_params { struct qed_link_output { bool link_up; - u32 supported_caps; /* In SUPPORTED defs */ - u32 advertised_caps; /* In ADVERTISED defs */ - u32 lp_caps; /* In ADVERTISED defs */ + /* In QED_LM_* defs */ + u32 supported_caps; + u32 advertised_caps; + u32 lp_caps; + u32 speed; /* In Mb/s */ u8 duplex; /* In DUPLEX defs */ u8 port; /* In PORT defs */ -- cgit v1.2.3 From f6a66927692e30bdc1792e7a1fc2107d4dfcf42d Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 17 Aug 2016 13:36:11 +0300 Subject: flow_dissector: Get vlan priority in addition to vlan id Add vlan priority check to the flow dissector by adding new flow dissector struct, flow_dissector_key_vlan which includes vlan tag fields. vlan_id and flow_label fields were under the same struct (flow_dissector_key_tags). It was a convenient setting since struct flow_dissector_key_tags is used by struct flow_keys and by setting vlan_id and flow_label under the same struct, we get precisely 24 or 48 bytes in flow_keys from flow_dissector_key_basic. Now, when adding vlan priority support, the code will be cleaner if flow_label and vlan tag won't be under the same struct anymore. Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a5f6ce6b578c..49d4aef1f789 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -81,6 +81,7 @@ static inline bool is_vlan_dev(const struct net_device *dev) #define skb_vlan_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) +#define skb_vlan_tag_get_prio(__skb) ((__skb)->vlan_tci & VLAN_PRIO_MASK) /** * struct vlan_pcpu_stats - VLAN percpu rx/tx stats -- cgit v1.2.3 From 1cb94db3d1bfe0075bde78fb2989f17e0a8a3936 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 17 Aug 2016 23:00:30 +0200 Subject: net: bgmac: support Ethernet core on BCM53573 SoCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BCM53573 is a new series of Broadcom's SoCs. It's based on ARM and can be found in two packages (versions): BCM53573 and BCM47189. It shares some code with the Northstar family, but also requires some new quirks. First of all there can be up to 2 Ethernet cores on this SoC. If that is the case, they are connected to two different switch ports allowing some more complex/optimized setups. It seems the second unit doesn't come fully configured and requires some IRQ quirk. Other than that only the first core is connected to the PHY. For the second one we have to register fixed PHY (similarly to the Northstar), otherwise generic PHY driver would get some invalid info. This has been successfully tested on Tenda AC9 (BCM47189B0). Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- include/linux/bcma/bcma.h | 3 +++ include/linux/bcma/bcma_regs.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 3db25df396cb..8eeedb2db924 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -205,6 +205,9 @@ struct bcma_host_ops { #define BCMA_PKG_ID_BCM4709 0 #define BCMA_CHIP_ID_BCM47094 53030 #define BCMA_CHIP_ID_BCM53018 53018 +#define BCMA_CHIP_ID_BCM53573 53573 +#define BCMA_PKG_ID_BCM53573 0 +#define BCMA_PKG_ID_BCM47189 1 /* Board types (on PCI usually equals to the subsystem dev id) */ /* BCM4313 */ diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h index ebd5c1fcdea4..c607fce6aadd 100644 --- a/include/linux/bcma/bcma_regs.h +++ b/include/linux/bcma/bcma_regs.h @@ -23,6 +23,7 @@ #define BCMA_CLKCTLST_4328A0_HAVEALP 0x00020000 /* 4328a0 has reversed bits */ /* Agent registers (common for every core) */ +#define BCMA_OOB_SEL_OUT_A30 0x0100 #define BCMA_IOCTL 0x0408 /* IO control */ #define BCMA_IOCTL_CLK 0x0001 #define BCMA_IOCTL_FGC 0x0002 -- cgit v1.2.3 From 5293efe62df81908f2e90c9820c7edcc8e61f5e9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Aug 2016 01:00:39 +0200 Subject: bpf: add bpf_skb_change_tail helper This work adds a bpf_skb_change_tail() helper for tc BPF programs. The basic idea is to expand or shrink the skb in a controlled manner. The eBPF program can then rewrite the rest via helpers like bpf_skb_store_bytes(), bpf_lX_csum_replace() and others rather than passing a raw buffer for writing here. bpf_skb_change_tail() is really a slow path helper and intended for replies with f.e. ICMP control messages. Concept is similar to other helpers like bpf_skb_change_proto() helper to keep the helper without protocol specifics and let the BPF program mangle the remaining parts. A flags field has been added and is reserved for now should we extend the helper in future. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0f665cb26b50..7047448e8129 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2295,7 +2295,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) int ___pskb_trim(struct sk_buff *skb, unsigned int len); -static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +static inline void __skb_set_length(struct sk_buff *skb, unsigned int len) { if (unlikely(skb_is_nonlinear(skb))) { WARN_ON(1); @@ -2305,6 +2305,11 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len) skb_set_tail_pointer(skb, len); } +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +{ + __skb_set_length(skb, len); +} + void skb_trim(struct sk_buff *skb, unsigned int len); static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) @@ -2335,6 +2340,20 @@ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len) BUG_ON(err); } +static inline int __skb_grow(struct sk_buff *skb, unsigned int len) +{ + unsigned int diff = len - skb->len; + + if (skb_tailroom(skb) < diff) { + int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb), + GFP_ATOMIC); + if (ret) + return ret; + } + __skb_set_length(skb, len); + return 0; +} + /** * skb_orphan - orphan a buffer * @skb: buffer to orphan @@ -2938,6 +2957,21 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) return __pskb_trim(skb, len); } +static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + __skb_trim(skb, len); + return 0; +} + +static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + return __skb_grow(skb, len); +} + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ @@ -3726,6 +3760,13 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } +static inline void skb_gso_reset(struct sk_buff *skb) +{ + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_segs = 0; + skb_shinfo(skb)->gso_type = 0; +} + void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) -- cgit v1.2.3 From 246779dd090bd1b74d2652b3a6ca7759f593b27a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 18 Aug 2016 16:50:56 +0800 Subject: rhashtable: Remove GFP flag from rhashtable_walk_init The commit 8f6fd83c6c5ec66a4a70c728535ddcdfef4f3697 ("rhashtable: accept GFP flags in rhashtable_walk_init") added a GFP flag argument to rhashtable_walk_init because some users wish to use the walker in an unsleepable context. In fact we don't need to allocate memory in rhashtable_walk_init at all. The walker is always paired with an iterator so we could just stash ourselves there. This patch does that by introducing a new enter function to replace the existing init function. This way we don't have to churn all the existing users again. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3eef0802a0cd..8b72ee710f95 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -173,7 +173,7 @@ struct rhashtable_walker { struct rhashtable_iter { struct rhashtable *ht; struct rhash_head *p; - struct rhashtable_walker *walker; + struct rhashtable_walker walker; unsigned int slot; unsigned int skip; }; @@ -346,8 +346,8 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, struct bucket_table *old_tbl); int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, - gfp_t gfp); +void rhashtable_walk_enter(struct rhashtable *ht, + struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); void *rhashtable_walk_next(struct rhashtable_iter *iter); @@ -906,4 +906,12 @@ static inline int rhashtable_replace_fast( return err; } +/* Obsolete function, do not use in new code. */ +static inline int rhashtable_walk_init(struct rhashtable *ht, + struct rhashtable_iter *iter, gfp_t gfp) +{ + rhashtable_walk_enter(ht, iter); + return 0; +} + #endif /* _LINUX_RHASHTABLE_H */ -- cgit v1.2.3 From 05fafbfb3d77f43ae18341ddc61eb5c477896778 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 19 Aug 2016 09:33:31 +0300 Subject: qed: utilize FW 8.10.10.0 This new firmware for the qed* adpaters fixes several issues: - Better blocking of malicious VFs. - After FLR, Tx-switching [internal routing] of packets might be incorrect. - Deletion of unicast MAC filters would sometime have side-effect of corrupting the MAC filters configred for a device. It also contains fixes for future qed* drivers that *hopefully* would be sent for review in the near future. In addition, it would allow driver some new functionality, including: - Allowing PF/VF driver compaitibility with old drivers [running pre-8.10.5.0 firmware]. - Better debug facilities. This would also bump the qed* driver versions to 8.10.9.20. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 361 +++++++++++++++++++++++++++++++++++---- include/linux/qed/eth_common.h | 155 ++++++++++------- include/linux/qed/iscsi_common.h | 28 +-- include/linux/qed/qed_chain.h | 13 -- include/linux/qed/tcp_common.h | 16 +- 5 files changed, 442 insertions(+), 131 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 40c0ada01806..d306e0b55581 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -5,28 +5,83 @@ * (GPL) Version 2, available from the file COPYING in the main directory of * this source tree. */ +#ifndef _COMMON_HSI_H +#define _COMMON_HSI_H +#include +#include +#include +#include + +/* dma_addr_t manip */ +#define DMA_LO(x) ((u32)(((dma_addr_t)(x)) & 0xffffffff)) +#define DMA_HI(x) ((u32)(((dma_addr_t)(x)) >> 32)) + +#define DMA_LO_LE(x) cpu_to_le32(DMA_LO(x)) +#define DMA_HI_LE(x) cpu_to_le32(DMA_HI(x)) + +/* It's assumed that whoever includes this has previously included an hsi + * file defining the regpair. + */ +#define DMA_REGPAIR_LE(x, val) (x).hi = DMA_HI_LE((val)); \ + (x).lo = DMA_LO_LE((val)) + +#define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) +#define HILO_DMA(hi, lo) HILO_GEN(hi, lo, dma_addr_t) +#define HILO_64(hi, lo) HILO_GEN(hi, lo, u64) +#define HILO_DMA_REGPAIR(regpair) (HILO_DMA(regpair.hi, regpair.lo)) +#define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) #ifndef __COMMON_HSI__ #define __COMMON_HSI__ -#define CORE_SPQE_PAGE_SIZE_BYTES 4096 #define X_FINAL_CLEANUP_AGG_INT 1 + +#define EVENT_RING_PAGE_SIZE_BYTES 4096 + #define NUM_OF_GLOBAL_QUEUES 128 +#define COMMON_QUEUE_ENTRY_MAX_BYTE_SIZE 64 + +#define ISCSI_CDU_TASK_SEG_TYPE 0 +#define RDMA_CDU_TASK_SEG_TYPE 1 + +#define FW_ASSERT_GENERAL_ATTN_IDX 32 + +#define MAX_PINNED_CCFC 32 /* Queue Zone sizes in bytes */ #define TSTORM_QZONE_SIZE 8 -#define MSTORM_QZONE_SIZE 0 +#define MSTORM_QZONE_SIZE 16 #define USTORM_QZONE_SIZE 8 #define XSTORM_QZONE_SIZE 8 #define YSTORM_QZONE_SIZE 0 #define PSTORM_QZONE_SIZE 0 -#define ETH_MAX_NUM_RX_QUEUES_PER_VF 16 +#define MSTORM_VF_ZONE_DEFAULT_SIZE_LOG 7 +#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DEFAULT 16 +#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DOUBLE 48 +#define ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD 112 + +/********************************/ +/* CORE (LIGHT L2) FW CONSTANTS */ +/********************************/ + +#define CORE_LL2_MAX_RAMROD_PER_CON 8 +#define CORE_LL2_TX_BD_PAGE_SIZE_BYTES 4096 +#define CORE_LL2_RX_BD_PAGE_SIZE_BYTES 4096 +#define CORE_LL2_RX_CQE_PAGE_SIZE_BYTES 4096 +#define CORE_LL2_RX_NUM_NEXT_PAGE_BDS 1 + +#define CORE_LL2_TX_MAX_BDS_PER_PACKET 12 + +#define CORE_SPQE_PAGE_SIZE_BYTES 4096 + +#define MAX_NUM_LL2_RX_QUEUES 32 +#define MAX_NUM_LL2_TX_STATS_COUNTERS 32 #define FW_MAJOR_VERSION 8 #define FW_MINOR_VERSION 10 -#define FW_REVISION_VERSION 5 +#define FW_REVISION_VERSION 10 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -83,6 +138,17 @@ #define NUM_OF_LCIDS (320) #define NUM_OF_LTIDS (320) +/* Clock values */ +#define MASTER_CLK_FREQ_E4 (375e6) +#define STORM_CLK_FREQ_E4 (1000e6) +#define CLK25M_CLK_FREQ_E4 (25e6) + +/* Global PXP windows (GTT) */ +#define NUM_OF_GTT 19 +#define GTT_DWORD_SIZE_BITS 10 +#define GTT_BYTE_SIZE_BITS (GTT_DWORD_SIZE_BITS + 2) +#define GTT_DWORD_SIZE BIT(GTT_DWORD_SIZE_BITS) + /*****************/ /* CDU CONSTANTS */ /*****************/ @@ -90,6 +156,8 @@ #define CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT (17) #define CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK (0x1ffff) +#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_TYPE_SHIFT (12) +#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_OFFSET_MASK (0xfff) /*****************/ /* DQ CONSTANTS */ /*****************/ @@ -115,6 +183,11 @@ #define DQ_XCM_ETH_TX_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 #define DQ_XCM_ETH_TX_BD_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 #define DQ_XCM_ETH_GO_TO_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD5 +#define DQ_XCM_ISCSI_SQ_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 +#define DQ_XCM_ISCSI_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3 +#define DQ_XCM_ISCSI_EXP_STAT_SN_CMD DQ_XCM_AGG_VAL_SEL_REG6 +#define DQ_XCM_ROCE_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 /* UCM agg val selection (HW) */ #define DQ_UCM_AGG_VAL_SEL_WORD0 0 @@ -159,13 +232,16 @@ #define DQ_XCM_AGG_FLG_SHIFT_CF23 7 /* XCM agg counter flag selection */ -#define DQ_XCM_CORE_DQ_CF_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF18) -#define DQ_XCM_CORE_TERMINATE_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF19) -#define DQ_XCM_CORE_SLOW_PATH_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF22) -#define DQ_XCM_ETH_DQ_CF_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF18) -#define DQ_XCM_ETH_TERMINATE_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF19) -#define DQ_XCM_ETH_SLOW_PATH_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF22) -#define DQ_XCM_ETH_TPH_EN_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_CORE_DQ_CF_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_CORE_TERMINATE_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_CORE_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ETH_DQ_CF_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_ETH_TERMINATE_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_ETH_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ETH_TPH_EN_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_ISCSI_DQ_FLUSH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_ISCSI_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) /* UCM agg counter flag selection (HW) */ #define DQ_UCM_AGG_FLG_SHIFT_CF0 0 @@ -178,9 +254,45 @@ #define DQ_UCM_AGG_FLG_SHIFT_RULE1EN 7 /* UCM agg counter flag selection (FW) */ -#define DQ_UCM_ETH_PMD_TX_ARM_CMD (1 << DQ_UCM_AGG_FLG_SHIFT_CF4) -#define DQ_UCM_ETH_PMD_RX_ARM_CMD (1 << DQ_UCM_AGG_FLG_SHIFT_CF5) - +#define DQ_UCM_ETH_PMD_TX_ARM_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) +#define DQ_UCM_ETH_PMD_RX_ARM_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) +#define DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) +#define DQ_UCM_ROCE_CQ_ARM_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) + +/* TCM agg counter flag selection (HW) */ +#define DQ_TCM_AGG_FLG_SHIFT_CF0 0 +#define DQ_TCM_AGG_FLG_SHIFT_CF1 1 +#define DQ_TCM_AGG_FLG_SHIFT_CF2 2 +#define DQ_TCM_AGG_FLG_SHIFT_CF3 3 +#define DQ_TCM_AGG_FLG_SHIFT_CF4 4 +#define DQ_TCM_AGG_FLG_SHIFT_CF5 5 +#define DQ_TCM_AGG_FLG_SHIFT_CF6 6 +#define DQ_TCM_AGG_FLG_SHIFT_CF7 7 +/* TCM agg counter flag selection (FW) */ +#define DQ_TCM_ISCSI_FLUSH_Q0_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) +#define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) + +/* PWM address mapping */ +#define DQ_PWM_OFFSET_DPM_BASE 0x0 +#define DQ_PWM_OFFSET_DPM_END 0x27 +#define DQ_PWM_OFFSET_XCM16_BASE 0x40 +#define DQ_PWM_OFFSET_XCM32_BASE 0x44 +#define DQ_PWM_OFFSET_UCM16_BASE 0x48 +#define DQ_PWM_OFFSET_UCM32_BASE 0x4C +#define DQ_PWM_OFFSET_UCM16_4 0x50 +#define DQ_PWM_OFFSET_TCM16_BASE 0x58 +#define DQ_PWM_OFFSET_TCM32_BASE 0x5C +#define DQ_PWM_OFFSET_XCM_FLAGS 0x68 +#define DQ_PWM_OFFSET_UCM_FLAGS 0x69 +#define DQ_PWM_OFFSET_TCM_FLAGS 0x6B + +#define DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD (DQ_PWM_OFFSET_XCM16_BASE + 2) +#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT (DQ_PWM_OFFSET_UCM32_BASE) +#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_16BIT (DQ_PWM_OFFSET_UCM16_4) +#define DQ_PWM_OFFSET_UCM_RDMA_INT_TIMEOUT (DQ_PWM_OFFSET_UCM16_BASE + 2) +#define DQ_PWM_OFFSET_UCM_RDMA_ARM_FLAGS (DQ_PWM_OFFSET_UCM_FLAGS) +#define DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD (DQ_PWM_OFFSET_TCM16_BASE + 1) +#define DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD (DQ_PWM_OFFSET_TCM16_BASE + 3) #define DQ_REGION_SHIFT (12) /* DPM */ @@ -214,15 +326,17 @@ */ #define CM_TX_PQ_BASE 0x200 +/* number of global Vport/QCN rate limiters */ +#define MAX_QM_GLOBAL_RLS 256 /* QM registers data */ #define QM_LINE_CRD_REG_WIDTH 16 -#define QM_LINE_CRD_REG_SIGN_BIT (1 << (QM_LINE_CRD_REG_WIDTH - 1)) +#define QM_LINE_CRD_REG_SIGN_BIT BIT((QM_LINE_CRD_REG_WIDTH - 1)) #define QM_BYTE_CRD_REG_WIDTH 24 -#define QM_BYTE_CRD_REG_SIGN_BIT (1 << (QM_BYTE_CRD_REG_WIDTH - 1)) +#define QM_BYTE_CRD_REG_SIGN_BIT BIT((QM_BYTE_CRD_REG_WIDTH - 1)) #define QM_WFQ_CRD_REG_WIDTH 32 -#define QM_WFQ_CRD_REG_SIGN_BIT (1 << (QM_WFQ_CRD_REG_WIDTH - 1)) +#define QM_WFQ_CRD_REG_SIGN_BIT BIT((QM_WFQ_CRD_REG_WIDTH - 1)) #define QM_RL_CRD_REG_WIDTH 32 -#define QM_RL_CRD_REG_SIGN_BIT (1 << (QM_RL_CRD_REG_WIDTH - 1)) +#define QM_RL_CRD_REG_SIGN_BIT BIT((QM_RL_CRD_REG_WIDTH - 1)) /*****************/ /* CAU CONSTANTS */ @@ -287,6 +401,17 @@ /* PXP CONSTANTS */ /*****************/ +/* Bars for Blocks */ +#define PXP_BAR_GRC 0 +#define PXP_BAR_TSDM 0 +#define PXP_BAR_USDM 0 +#define PXP_BAR_XSDM 0 +#define PXP_BAR_MSDM 0 +#define PXP_BAR_YSDM 0 +#define PXP_BAR_PSDM 0 +#define PXP_BAR_IGU 0 +#define PXP_BAR_DQ 1 + /* PTT and GTT */ #define PXP_NUM_PF_WINDOWS 12 #define PXP_PER_PF_ENTRY_SIZE 8 @@ -334,6 +459,52 @@ (PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \ PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1) +/* PF BAR */ +#define PXP_BAR0_START_GRC 0x0000 +#define PXP_BAR0_GRC_LENGTH 0x1C00000 +#define PXP_BAR0_END_GRC (PXP_BAR0_START_GRC + \ + PXP_BAR0_GRC_LENGTH - 1) + +#define PXP_BAR0_START_IGU 0x1C00000 +#define PXP_BAR0_IGU_LENGTH 0x10000 +#define PXP_BAR0_END_IGU (PXP_BAR0_START_IGU + \ + PXP_BAR0_IGU_LENGTH - 1) + +#define PXP_BAR0_START_TSDM 0x1C80000 +#define PXP_BAR0_SDM_LENGTH 0x40000 +#define PXP_BAR0_SDM_RESERVED_LENGTH 0x40000 +#define PXP_BAR0_END_TSDM (PXP_BAR0_START_TSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_MSDM 0x1D00000 +#define PXP_BAR0_END_MSDM (PXP_BAR0_START_MSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_USDM 0x1D80000 +#define PXP_BAR0_END_USDM (PXP_BAR0_START_USDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_XSDM 0x1E00000 +#define PXP_BAR0_END_XSDM (PXP_BAR0_START_XSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_YSDM 0x1E80000 +#define PXP_BAR0_END_YSDM (PXP_BAR0_START_YSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_PSDM 0x1F00000 +#define PXP_BAR0_END_PSDM (PXP_BAR0_START_PSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_FIRST_INVALID_ADDRESS (PXP_BAR0_END_PSDM + 1) + +/* VF BAR */ +#define PXP_VF_BAR0 0 + +#define PXP_VF_BAR0_START_GRC 0x3E00 +#define PXP_VF_BAR0_GRC_LENGTH 0x200 +#define PXP_VF_BAR0_END_GRC (PXP_VF_BAR0_START_GRC + \ + PXP_VF_BAR0_GRC_LENGTH - 1) #define PXP_VF_BAR0_START_IGU 0 #define PXP_VF_BAR0_IGU_LENGTH 0x3000 @@ -399,6 +570,20 @@ #define PXP_NUM_ILT_RECORDS_BB 7600 #define PXP_NUM_ILT_RECORDS_K2 11000 #define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2) +#define PXP_QUEUES_ZONE_MAX_NUM 320 +/*****************/ +/* PRM CONSTANTS */ +/*****************/ +#define PRM_DMA_PAD_BYTES_NUM 2 +/******************/ +/* SDMs CONSTANTS */ +/******************/ +#define SDM_OP_GEN_TRIG_NONE 0 +#define SDM_OP_GEN_TRIG_WAKE_THREAD 1 +#define SDM_OP_GEN_TRIG_AGG_INT 2 +#define SDM_OP_GEN_TRIG_LOADER 4 +#define SDM_OP_GEN_TRIG_INDICATE_ERROR 6 +#define SDM_OP_GEN_TRIG_RELEASE_THREAD 7 #define SDM_COMP_TYPE_NONE 0 #define SDM_COMP_TYPE_WAKE_THREAD 1 @@ -424,6 +609,8 @@ /* PRS CONSTANTS */ /*****************/ +#define PRS_GFT_CAM_LINES_NO_MATCH 31 + /* Async data KCQ CQE */ struct async_data { __le32 cid; @@ -440,20 +627,6 @@ struct coalescing_timeset { #define COALESCING_TIMESET_VALID_SHIFT 7 }; -struct common_prs_pf_msg_info { - __le32 value; -#define COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_SHIFT 0 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_SHIFT 1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_SHIFT 2 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_SHIFT 3 -#define COMMON_PRS_PF_MSG_INFO_RESERVED_MASK 0xFFFFFFF -#define COMMON_PRS_PF_MSG_INFO_RESERVED_SHIFT 4 -}; - struct common_queue_zone { __le16 ring_drv_data_consumer; __le16 reserved; @@ -473,6 +646,19 @@ struct vf_pf_channel_eqe_data { struct regpair msg_addr; }; +struct iscsi_eqe_data { + __le32 cid; + __le16 conn_id; + u8 error_code; + u8 error_pdu_opcode_reserved; +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_MASK 0x3F +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_SHIFT 0 +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_MASK 0x1 +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_SHIFT 6 +#define ISCSI_EQE_DATA_RESERVED0_MASK 0x1 +#define ISCSI_EQE_DATA_RESERVED0_SHIFT 7 +}; + struct malicious_vf_eqe_data { u8 vf_id; u8 err_id; @@ -488,6 +674,7 @@ struct initial_cleanup_eqe_data { union event_ring_data { u8 bytes[8]; struct vf_pf_channel_eqe_data vf_pf_channel; + struct iscsi_eqe_data iscsi_info; struct malicious_vf_eqe_data malicious_vf; struct initial_cleanup_eqe_data vf_init_cleanup; }; @@ -616,6 +803,52 @@ enum db_dest { MAX_DB_DEST }; +/* Enum of doorbell DPM types */ +enum db_dpm_type { + DPM_LEGACY, + DPM_ROCE, + DPM_L2_INLINE, + DPM_L2_BD, + MAX_DB_DPM_TYPE +}; + +/* Structure for doorbell data, in L2 DPM mode, for 1st db in a DPM burst */ +struct db_l2_dpm_data { + __le16 icid; + __le16 bd_prod; + __le32 params; +#define DB_L2_DPM_DATA_SIZE_MASK 0x3F +#define DB_L2_DPM_DATA_SIZE_SHIFT 0 +#define DB_L2_DPM_DATA_DPM_TYPE_MASK 0x3 +#define DB_L2_DPM_DATA_DPM_TYPE_SHIFT 6 +#define DB_L2_DPM_DATA_NUM_BDS_MASK 0xFF +#define DB_L2_DPM_DATA_NUM_BDS_SHIFT 8 +#define DB_L2_DPM_DATA_PKT_SIZE_MASK 0x7FF +#define DB_L2_DPM_DATA_PKT_SIZE_SHIFT 16 +#define DB_L2_DPM_DATA_RESERVED0_MASK 0x1 +#define DB_L2_DPM_DATA_RESERVED0_SHIFT 27 +#define DB_L2_DPM_DATA_SGE_NUM_MASK 0x7 +#define DB_L2_DPM_DATA_SGE_NUM_SHIFT 28 +#define DB_L2_DPM_DATA_RESERVED1_MASK 0x1 +#define DB_L2_DPM_DATA_RESERVED1_SHIFT 31 +}; + +/* Structure for SGE in a DPM doorbell of type DPM_L2_BD */ +struct db_l2_dpm_sge { + struct regpair addr; + __le16 nbytes; + __le16 bitfields; +#define DB_L2_DPM_SGE_TPH_ST_INDEX_MASK 0x1FF +#define DB_L2_DPM_SGE_TPH_ST_INDEX_SHIFT 0 +#define DB_L2_DPM_SGE_RESERVED0_MASK 0x3 +#define DB_L2_DPM_SGE_RESERVED0_SHIFT 9 +#define DB_L2_DPM_SGE_ST_VALID_MASK 0x1 +#define DB_L2_DPM_SGE_ST_VALID_SHIFT 11 +#define DB_L2_DPM_SGE_RESERVED1_MASK 0xF +#define DB_L2_DPM_SGE_RESERVED1_SHIFT 12 + __le32 reserved2; +}; + /* Structure for doorbell address, in legacy mode */ struct db_legacy_addr { __le32 addr; @@ -627,6 +860,49 @@ struct db_legacy_addr { #define DB_LEGACY_ADDR_ICID_SHIFT 5 }; +/* Structure for doorbell address, in PWM mode */ +struct db_pwm_addr { + __le32 addr; +#define DB_PWM_ADDR_RESERVED0_MASK 0x7 +#define DB_PWM_ADDR_RESERVED0_SHIFT 0 +#define DB_PWM_ADDR_OFFSET_MASK 0x7F +#define DB_PWM_ADDR_OFFSET_SHIFT 3 +#define DB_PWM_ADDR_WID_MASK 0x3 +#define DB_PWM_ADDR_WID_SHIFT 10 +#define DB_PWM_ADDR_DPI_MASK 0xFFFF +#define DB_PWM_ADDR_DPI_SHIFT 12 +#define DB_PWM_ADDR_RESERVED1_MASK 0xF +#define DB_PWM_ADDR_RESERVED1_SHIFT 28 +}; + +/* Parameters to RoCE firmware, passed in EDPM doorbell */ +struct db_roce_dpm_params { + __le32 params; +#define DB_ROCE_DPM_PARAMS_SIZE_MASK 0x3F +#define DB_ROCE_DPM_PARAMS_SIZE_SHIFT 0 +#define DB_ROCE_DPM_PARAMS_DPM_TYPE_MASK 0x3 +#define DB_ROCE_DPM_PARAMS_DPM_TYPE_SHIFT 6 +#define DB_ROCE_DPM_PARAMS_OPCODE_MASK 0xFF +#define DB_ROCE_DPM_PARAMS_OPCODE_SHIFT 8 +#define DB_ROCE_DPM_PARAMS_WQE_SIZE_MASK 0x7FF +#define DB_ROCE_DPM_PARAMS_WQE_SIZE_SHIFT 16 +#define DB_ROCE_DPM_PARAMS_RESERVED0_MASK 0x1 +#define DB_ROCE_DPM_PARAMS_RESERVED0_SHIFT 27 +#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 +#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_SHIFT 28 +#define DB_ROCE_DPM_PARAMS_S_FLG_MASK 0x1 +#define DB_ROCE_DPM_PARAMS_S_FLG_SHIFT 29 +#define DB_ROCE_DPM_PARAMS_RESERVED1_MASK 0x3 +#define DB_ROCE_DPM_PARAMS_RESERVED1_SHIFT 30 +}; + +/* Structure for doorbell data, in ROCE DPM mode, for 1st db in a DPM burst */ +struct db_roce_dpm_data { + __le16 icid; + __le16 prod_val; + struct db_roce_dpm_params params; +}; + /* Igu interrupt command */ enum igu_int_cmd { IGU_INT_ENABLE = 0, @@ -764,6 +1040,19 @@ struct pxp_ptt_entry { struct pxp_pretend_cmd pretend; }; +/* VF Zone A Permission Register. */ +struct pxp_vf_zone_a_permission { + __le32 control; +#define PXP_VF_ZONE_A_PERMISSION_VFID_MASK 0xFF +#define PXP_VF_ZONE_A_PERMISSION_VFID_SHIFT 0 +#define PXP_VF_ZONE_A_PERMISSION_VALID_MASK 0x1 +#define PXP_VF_ZONE_A_PERMISSION_VALID_SHIFT 8 +#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_MASK 0x7F +#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_SHIFT 9 +#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_MASK 0xFFFF +#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_SHIFT 16 +}; + /* RSS hash type */ struct rdif_task_context { __le32 initial_ref_tag; @@ -831,6 +1120,7 @@ struct rdif_task_context { __le32 reserved2; }; +/* RSS hash type */ enum rss_hash_type { RSS_HASH_TYPE_DEFAULT = 0, RSS_HASH_TYPE_IPV4 = 1, @@ -942,7 +1232,7 @@ struct tdif_task_context { }; struct timers_context { - __le32 logical_client0; + __le32 logical_client_0; #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK 0xFFFFFFF #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT 0 #define TIMERS_CONTEXT_VALIDLC0_MASK 0x1 @@ -951,7 +1241,7 @@ struct timers_context { #define TIMERS_CONTEXT_ACTIVELC0_SHIFT 29 #define TIMERS_CONTEXT_RESERVED0_MASK 0x3 #define TIMERS_CONTEXT_RESERVED0_SHIFT 30 - __le32 logical_client1; + __le32 logical_client_1; #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK 0xFFFFFFF #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT 0 #define TIMERS_CONTEXT_VALIDLC1_MASK 0x1 @@ -960,7 +1250,7 @@ struct timers_context { #define TIMERS_CONTEXT_ACTIVELC1_SHIFT 29 #define TIMERS_CONTEXT_RESERVED1_MASK 0x3 #define TIMERS_CONTEXT_RESERVED1_SHIFT 30 - __le32 logical_client2; + __le32 logical_client_2; #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK 0xFFFFFFF #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT 0 #define TIMERS_CONTEXT_VALIDLC2_MASK 0x1 @@ -978,3 +1268,4 @@ struct timers_context { #define TIMERS_CONTEXT_RESERVED3_SHIFT 29 }; #endif /* __COMMON_HSI__ */ +#endif diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index b5ebc697d05f..1aa0727c4136 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -13,9 +13,12 @@ /* ETH FW CONSTANTS */ /********************/ #define ETH_HSI_VER_MAJOR 3 -#define ETH_HSI_VER_MINOR 0 -#define ETH_CACHE_LINE_SIZE 64 +#define ETH_HSI_VER_MINOR 10 + +#define ETH_HSI_VER_NO_PKT_LEN_TUNN 5 +#define ETH_CACHE_LINE_SIZE 64 +#define ETH_RX_CQE_GAP 32 #define ETH_MAX_RAMROD_PER_CON 8 #define ETH_TX_BD_PAGE_SIZE_BYTES 4096 #define ETH_RX_BD_PAGE_SIZE_BYTES 4096 @@ -24,15 +27,25 @@ #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT 1 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET 18 +#define ETH_TX_MAX_BDS_PER_LSO_PACKET 255 #define ETH_TX_MAX_LSO_HDR_NBD 4 #define ETH_TX_MIN_BDS_PER_LSO_PKT 3 #define ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT 3 #define ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT 2 #define ETH_TX_MIN_BDS_PER_PKT_W_LOOPBACK_MODE 2 -#define ETH_TX_MAX_NON_LSO_PKT_LEN (9700 - (4 + 12 + 8)) +#define ETH_TX_MAX_NON_LSO_PKT_LEN (9700 - (4 + 4 + 12 + 8)) #define ETH_TX_MAX_LSO_HDR_BYTES 510 +#define ETH_TX_LSO_WINDOW_BDS_NUM (18 - 1) +#define ETH_TX_LSO_WINDOW_MIN_LEN 9700 +#define ETH_TX_MAX_LSO_PAYLOAD_LEN 0xFE000 +#define ETH_TX_NUM_SAME_AS_LAST_ENTRIES 320 +#define ETH_TX_INACTIVE_SAME_AS_LAST 0xFFFF #define ETH_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS +#define ETH_NUM_STATISTIC_COUNTERS_DOUBLE_VF_ZONE \ + (ETH_NUM_STATISTIC_COUNTERS - MAX_NUM_VFS / 2) +#define ETH_NUM_STATISTIC_COUNTERS_QUAD_VF_ZONE \ + (ETH_NUM_STATISTIC_COUNTERS - 3 * MAX_NUM_VFS / 4) /* Maximum number of buffers, used for RX packet placement */ #define ETH_RX_MAX_BUFF_PER_PKT 5 @@ -59,6 +72,8 @@ #define ETH_TPA_CQE_CONT_LEN_LIST_SIZE 6 #define ETH_TPA_CQE_END_LEN_LIST_SIZE 4 +/* Control frame check constants */ +#define ETH_CTL_FRAME_ETH_TYPE_NUM 4 struct eth_tx_1st_bd_flags { u8 bitfields; @@ -82,10 +97,10 @@ struct eth_tx_1st_bd_flags { /* The parsing information data fo rthe first tx bd of a given packet. */ struct eth_tx_data_1st_bd { - __le16 vlan; - u8 nbds; - struct eth_tx_1st_bd_flags bd_flags; - __le16 bitfields; + __le16 vlan; + u8 nbds; + struct eth_tx_1st_bd_flags bd_flags; + __le16 bitfields; #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK 0x1 #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT 0 #define ETH_TX_DATA_1ST_BD_RESERVED0_MASK 0x1 @@ -96,7 +111,7 @@ struct eth_tx_data_1st_bd { /* The parsing information data for the second tx bd of a given packet. */ struct eth_tx_data_2nd_bd { - __le16 tunn_ip_size; + __le16 tunn_ip_size; __le16 bitfields1; #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK 0xF #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0 @@ -125,9 +140,14 @@ struct eth_tx_data_2nd_bd { #define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT 13 }; +/* Firmware data for L2-EDPM packet. */ +struct eth_edpm_fw_data { + struct eth_tx_data_1st_bd data_1st_bd; + struct eth_tx_data_2nd_bd data_2nd_bd; + __le32 reserved; +}; + struct eth_fast_path_cqe_fw_debug { - u8 reserved0; - u8 reserved1; __le16 reserved2; }; @@ -148,6 +168,17 @@ struct eth_tunnel_parsing_flags { #define ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT 7 }; +/* PMD flow control bits */ +struct eth_pmd_flow_flags { + u8 flags; +#define ETH_PMD_FLOW_FLAGS_VALID_MASK 0x1 +#define ETH_PMD_FLOW_FLAGS_VALID_SHIFT 0 +#define ETH_PMD_FLOW_FLAGS_TOGGLE_MASK 0x1 +#define ETH_PMD_FLOW_FLAGS_TOGGLE_SHIFT 1 +#define ETH_PMD_FLOW_FLAGS_RESERVED_MASK 0x3F +#define ETH_PMD_FLOW_FLAGS_RESERVED_SHIFT 2 +}; + /* Regular ETH Rx FP CQE. */ struct eth_fast_path_rx_reg_cqe { u8 type; @@ -166,64 +197,63 @@ struct eth_fast_path_rx_reg_cqe { u8 placement_offset; struct eth_tunnel_parsing_flags tunnel_pars_flags; u8 bd_num; - u8 reserved[7]; + u8 reserved[9]; struct eth_fast_path_cqe_fw_debug fw_debug; u8 reserved1[3]; - u8 flags; -#define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK 0x1 -#define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT 0 -#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK 0x1 -#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_SHIFT 1 -#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_MASK 0x3F -#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_SHIFT 2 + struct eth_pmd_flow_flags pmd_flags; }; /* TPA-continue ETH Rx FP CQE. */ struct eth_fast_path_rx_tpa_cont_cqe { - u8 type; - u8 tpa_agg_index; - __le16 len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; - u8 reserved[5]; - u8 reserved1; - __le16 reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 type; + u8 tpa_agg_index; + __le16 len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 reserved; + u8 reserved1; + __le16 reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 reserved3[3]; + struct eth_pmd_flow_flags pmd_flags; }; /* TPA-end ETH Rx FP CQE. */ struct eth_fast_path_rx_tpa_end_cqe { - u8 type; - u8 tpa_agg_index; - __le16 total_packet_len; - u8 num_of_bds; - u8 end_reason; - __le16 num_of_coalesced_segs; - __le32 ts_delta; - __le16 len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE]; - u8 reserved1[3]; - u8 reserved2; - __le16 reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + u8 type; + u8 tpa_agg_index; + __le16 total_packet_len; + u8 num_of_bds; + u8 end_reason; + __le16 num_of_coalesced_segs; + __le32 ts_delta; + __le16 len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + __le16 reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + __le16 reserved1; + u8 reserved2; + struct eth_pmd_flow_flags pmd_flags; }; /* TPA-start ETH Rx FP CQE. */ struct eth_fast_path_rx_tpa_start_cqe { - u8 type; - u8 bitfields; + u8 type; + u8 bitfields; #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_MASK 0x7 #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_SHIFT 0 #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_MASK 0xF #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_SHIFT 3 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_MASK 0x1 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_SHIFT 7 - __le16 seg_len; + __le16 seg_len; struct parsing_and_err_flags pars_flags; - __le16 vlan_tag; - __le32 rss_hash; - __le16 len_on_first_bd; - u8 placement_offset; + __le16 vlan_tag; + __le32 rss_hash; + __le16 len_on_first_bd; + u8 placement_offset; struct eth_tunnel_parsing_flags tunnel_pars_flags; - u8 tpa_agg_index; - u8 header_len; - __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE]; + u8 tpa_agg_index; + u8 header_len; + __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE]; struct eth_fast_path_cqe_fw_debug fw_debug; + u8 reserved; + struct eth_pmd_flow_flags pmd_flags; }; /* The L4 pseudo checksum mode for Ethernet */ @@ -245,15 +275,7 @@ struct eth_slow_path_rx_cqe { u8 reserved[25]; __le16 echo; u8 reserved1; - u8 flags; -/* for PMD mode - valid indication */ -#define ETH_SLOW_PATH_RX_CQE_VALID_MASK 0x1 -#define ETH_SLOW_PATH_RX_CQE_VALID_SHIFT 0 -/* for PMD mode - valid toggle indication */ -#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_MASK 0x1 -#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_SHIFT 1 -#define ETH_SLOW_PATH_RX_CQE_RESERVED2_MASK 0x3F -#define ETH_SLOW_PATH_RX_CQE_RESERVED2_SHIFT 2 + struct eth_pmd_flow_flags pmd_flags; }; /* union for all ETH Rx CQE types */ @@ -276,6 +298,11 @@ enum eth_rx_cqe_type { MAX_ETH_RX_CQE_TYPE }; +struct eth_rx_pmd_cqe { + union eth_rx_cqe cqe; + u8 reserved[ETH_RX_CQE_GAP]; +}; + enum eth_rx_tunn_type { ETH_RX_NO_TUNN, ETH_RX_TUNN_GENEVE, @@ -313,8 +340,8 @@ struct eth_tx_2nd_bd { /* The parsing information data for the third tx bd of a given packet. */ struct eth_tx_data_3rd_bd { - __le16 lso_mss; - __le16 bitfields; + __le16 lso_mss; + __le16 bitfields; #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK 0xF #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT 0 #define ETH_TX_DATA_3RD_BD_HDR_NBD_MASK 0xF @@ -323,8 +350,8 @@ struct eth_tx_data_3rd_bd { #define ETH_TX_DATA_3RD_BD_START_BD_SHIFT 8 #define ETH_TX_DATA_3RD_BD_RESERVED0_MASK 0x7F #define ETH_TX_DATA_3RD_BD_RESERVED0_SHIFT 9 - u8 tunn_l4_hdr_start_offset_w; - u8 tunn_hdr_size_w; + u8 tunn_l4_hdr_start_offset_w; + u8 tunn_hdr_size_w; }; /* The third tx bd of a given packet */ @@ -355,10 +382,10 @@ struct eth_tx_bd { }; union eth_tx_bd_types { - struct eth_tx_1st_bd first_bd; - struct eth_tx_2nd_bd second_bd; - struct eth_tx_3rd_bd third_bd; - struct eth_tx_bd reg_bd; + struct eth_tx_1st_bd first_bd; + struct eth_tx_2nd_bd second_bd; + struct eth_tx_3rd_bd third_bd; + struct eth_tx_bd reg_bd; }; /* Mstorm Queue Zone */ @@ -389,8 +416,8 @@ struct eth_db_data { #define ETH_DB_DATA_RESERVED_SHIFT 5 #define ETH_DB_DATA_AGG_VAL_SEL_MASK 0x3 #define ETH_DB_DATA_AGG_VAL_SEL_SHIFT 6 - u8 agg_flags; - __le16 bd_prod; + u8 agg_flags; + __le16 bd_prod; }; #endif /* __ETH_COMMON__ */ diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index b3c0feb15ae9..8f64b1223c2f 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -311,7 +311,7 @@ struct iscsi_login_req_hdr { #define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_SHIFT 0 #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_SHIFT 24 - __le32 isid_TABC; + __le32 isid_tabc; __le16 tsih; __le16 isid_d; __le32 itt; @@ -464,7 +464,7 @@ struct iscsi_login_response_hdr { #define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_SHIFT 0 #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 - __le32 isid_TABC; + __le32 isid_tabc; __le16 tsih; __le16 isid_d; __le32 itt; @@ -688,8 +688,7 @@ union iscsi_cqe { enum iscsi_cqes_type { ISCSI_CQE_TYPE_SOLICITED = 1, ISCSI_CQE_TYPE_UNSOLICITED, - ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE - , + ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE, ISCSI_CQE_TYPE_TASK_CLEANUP, ISCSI_CQE_TYPE_DUMMY, MAX_ISCSI_CQES_TYPE @@ -769,9 +768,9 @@ enum iscsi_eqe_opcode { ISCSI_EVENT_TYPE_UPDATE_CONN, ISCSI_EVENT_TYPE_CLEAR_SQ, ISCSI_EVENT_TYPE_TERMINATE_CONN, + ISCSI_EVENT_TYPE_MAC_UPDATE_CONN, ISCSI_EVENT_TYPE_ASYN_CONNECT_COMPLETE, ISCSI_EVENT_TYPE_ASYN_TERMINATE_DONE, - RESERVED8, RESERVED9, ISCSI_EVENT_TYPE_START_OF_ERROR_TYPES = 10, ISCSI_EVENT_TYPE_ASYN_ABORT_RCVD, @@ -867,6 +866,7 @@ enum iscsi_ramrod_cmd_id { ISCSI_RAMROD_CMD_ID_UPDATE_CONN = 4, ISCSI_RAMROD_CMD_ID_TERMINATION_CONN = 5, ISCSI_RAMROD_CMD_ID_CLEAR_SQ = 6, + ISCSI_RAMROD_CMD_ID_MAC_UPDATE = 7, MAX_ISCSI_RAMROD_CMD_ID }; @@ -883,6 +883,16 @@ union iscsi_seq_num { __le16 r2t_sn; }; +struct iscsi_spe_conn_mac_update { + struct iscsi_slow_path_hdr hdr; + __le16 conn_id; + __le32 fw_cid; + __le16 remote_mac_addr_lo; + __le16 remote_mac_addr_mid; + __le16 remote_mac_addr_hi; + u8 reserved0[2]; +}; + struct iscsi_spe_conn_offload { struct iscsi_slow_path_hdr hdr; __le16 conn_id; @@ -1302,14 +1312,6 @@ struct mstorm_iscsi_stats_drv { struct regpair iscsi_rx_dropped_pdus_task_not_valid; }; -struct ooo_opaque { - __le32 cid; - u8 drop_isle; - u8 drop_size; - u8 ooo_opcode; - u8 ooo_isle; -}; - struct pstorm_iscsi_stats_drv { struct regpair iscsi_tx_bytes_cnt; struct regpair iscsi_tx_packet_cnt; diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 7e441bdeabdc..72d88cf3ca25 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -16,19 +16,6 @@ #include #include -/* dma_addr_t manip */ -#define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) -#define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) -#define DMA_REGPAIR_LE(x, val) do { \ - (x).hi = DMA_HI_LE((val)); \ - (x).lo = DMA_LO_LE((val)); \ - } while (0) - -#define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) -#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64) -#define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) -#define HILO_DMA_REGPAIR(regpair) ((dma_addr_t)HILO_64_REGPAIR(regpair)) - enum qed_chain_mode { /* Each Page contains a next pointer at its end */ QED_CHAIN_MODE_NEXT_PTR, diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h index accba0e6b704..dc3889d1bbe6 100644 --- a/include/linux/qed/tcp_common.h +++ b/include/linux/qed/tcp_common.h @@ -11,6 +11,14 @@ #define TCP_INVALID_TIMEOUT_VAL -1 +struct ooo_opaque { + __le32 cid; + u8 drop_isle; + u8 drop_size; + u8 ooo_opcode; + u8 ooo_isle; +}; + enum tcp_connect_mode { TCP_CONNECT_ACTIVE, TCP_CONNECT_PASSIVE, @@ -18,14 +26,10 @@ enum tcp_connect_mode { }; struct tcp_init_params { - __le32 max_cwnd; - __le16 dup_ack_threshold; + __le32 two_msl_timer; __le16 tx_sws_timer; - __le16 min_rto; - __le16 min_rto_rt; - __le16 max_rto; u8 maxfinrt; - u8 reserved[1]; + u8 reserved[9]; }; enum tcp_ip_version { -- cgit v1.2.3 From adca058b56108eb3458165c6a9e5d78558be8b52 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Fri, 19 Aug 2016 20:17:02 -0700 Subject: iio: buffer-callback: allow getting underlying iio_dev Add iio_channel_cb_get_iio_dev function to allow getting the underlying iio_dev. This is useful for setting the trigger of the consumer ADC device. Signed-off-by: Matt Ranostay Signed-off-by: Jonathan Cameron --- include/linux/iio/consumer.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 3d672f72e7ec..9edccfba1ffb 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -164,6 +164,18 @@ void iio_channel_stop_all_cb(struct iio_cb_buffer *cb_buff); struct iio_channel *iio_channel_cb_get_channels(const struct iio_cb_buffer *cb_buffer); +/** + * iio_channel_cb_get_iio_dev() - get access to the underlying device. + * @cb_buffer: The callback buffer from whom we want the device + * information. + * + * This function allows one to obtain information about the device. + * The primary aim is to allow drivers that are consuming a device to query + * things like current trigger. + */ +struct iio_dev +*iio_channel_cb_get_iio_dev(const struct iio_cb_buffer *cb_buffer); + /** * iio_read_channel_raw() - read from a given channel * @chan: The channel being queried. -- cgit v1.2.3 From 7f6cf7414538181f4091b06e905d19a23a451108 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 15 Aug 2016 12:12:47 -0700 Subject: iio: hid-sensors: use asynchronous resume Some platforms power off sensor hubs during S3 suspend, which will require longer time to resume. This hurts system resume time, so resume asynchronously. Signed-off-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index c02b5ce6c5cd..dd85f3503410 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -236,6 +236,7 @@ struct hid_sensor_common { struct hid_sensor_hub_attribute_info report_state; struct hid_sensor_hub_attribute_info power_state; struct hid_sensor_hub_attribute_info sensitivity; + struct work_struct work; }; /* Convert from hid unit expo to regular exponent */ -- cgit v1.2.3 From 7ec99de36f402618ae44147ac7fa9a07e4757a5f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jun 2016 13:58:26 -0700 Subject: rcu: Provide exact CPU-online tracking for RCU Up to now, RCU has assumed that the CPU-online process makes it from CPU_UP_PREPARE to set_cpu_online() within one jiffy. Given the recent rise of virtualized environments, this assumption is very clearly obsolete. Failing to meet this deadline can result in RCU paying attention to an incoming CPU for one jiffy, then ignoring it until the grace period following the one in which that CPU sets itself online. This situation might prove to be fatally disappointing to any RCU read-side critical sections that had the misfortune to execute during the time in which RCU was ignoring the slow-to-come-online CPU. This commit therefore updates RCU's internal CPU state-tracking information at notify_cpu_starting() time, thus providing RCU with an exact transition of the CPU's state from offline to online. Note that this means that incoming CPUs must not use RCU read-side critical section (other than those of SRCU) until notify_cpu_starting() time. Note also that the CPU_STARTING notifiers -are- allowed to use RCU read-side critical sections. (Of course, CPU-hotplug notifiers are rapidly becoming obsolete, so you need to act fast!) If a given architecture or CPU family needs to use RCU read-side critical sections earlier, the call to rcu_cpu_starting() from notify_cpu_starting() will need to be architecture-specific, with architectures that need early use being required to hand-place the call to rcu_cpu_starting() at some point preceding the call to notify_cpu_starting(). Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1aa62e1a761b..321f9ed552a9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -334,6 +334,7 @@ void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); void rcu_report_dead(unsigned int cpu); +void rcu_cpu_starting(unsigned int cpu); #ifndef CONFIG_TINY_RCU void rcu_end_inkernel_boot(void); -- cgit v1.2.3 From 489bb3d252d41392ce52590e49f0ae8782fb016e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 21 Aug 2016 16:54:40 +0900 Subject: torture: TOROUT_STRING(): Insert a space between flag and message The TOROUT_STRING() macro does not insert a space between the flag and the message. In contrast, other similar torture-test dmesg messages consistently supply a single space character. This difference makes the output hard to read and to mechanically parse. This commit therefore adds a space character between flag and message in TOROUT_STRING() output. Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 6685a73736a2..a45702eb3e7b 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -43,7 +43,7 @@ #define TORTURE_FLAG "-torture:" #define TOROUT_STRING(s) \ - pr_alert("%s" TORTURE_FLAG s "\n", torture_type) + pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s) #define VERBOSE_TOROUT_STRING(s) \ do { if (verbose) pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); } while (0) #define VERBOSE_TOROUT_ERRSTRING(s) \ -- cgit v1.2.3 From d8c2c7e3404e5bcaeae4af78d6935e5b8fcc97ee Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 22 Aug 2016 13:25:11 +0300 Subject: qed*: Add support for VFs over legacy PFs Modern VFs can't run on old non-compatible as the fastpath HSI is slightly changed - but as the HSI is actually very close [basically, a single bit whose meaning flipped] this can be supported with small modifications. The major differences would be in: - Recognizing that VF is running on top of a legacy PF. - Returning some slowpath configurations that are no longer needed on top of modern PFs, but would be required when working over the legacy ones. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 4475a9d8ae15..33c24ebc9b7f 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -23,6 +23,9 @@ struct qed_dev_eth_info { u8 port_mac[ETH_ALEN]; u8 num_vlan_filters; + + /* Legacy VF - this affects the datapath, so qede has to know */ + bool is_legacy; }; struct qed_update_vport_rss_params { -- cgit v1.2.3 From f1ff8666ed87b0013e45ce2d335085407bb38a60 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Tue, 23 Aug 2016 07:19:50 +0300 Subject: qed: Fix address macros Last FW submission reverted various macros into an older form, where they generate compilation warnings on some architectures. Bring back the newer macros instead. Fixes: 05fafbfb3d77 ("qed: utilize FW 8.10.10.0") Reported-by: kbuild test robot Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index d306e0b55581..70b30e4d3cc4 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -13,23 +13,17 @@ #include /* dma_addr_t manip */ -#define DMA_LO(x) ((u32)(((dma_addr_t)(x)) & 0xffffffff)) -#define DMA_HI(x) ((u32)(((dma_addr_t)(x)) >> 32)) - -#define DMA_LO_LE(x) cpu_to_le32(DMA_LO(x)) -#define DMA_HI_LE(x) cpu_to_le32(DMA_HI(x)) - -/* It's assumed that whoever includes this has previously included an hsi - * file defining the regpair. - */ -#define DMA_REGPAIR_LE(x, val) (x).hi = DMA_HI_LE((val)); \ - (x).lo = DMA_LO_LE((val)) +#define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) +#define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) +#define DMA_REGPAIR_LE(x, val) do { \ + (x).hi = DMA_HI_LE((val)); \ + (x).lo = DMA_LO_LE((val)); \ + } while (0) #define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) -#define HILO_DMA(hi, lo) HILO_GEN(hi, lo, dma_addr_t) -#define HILO_64(hi, lo) HILO_GEN(hi, lo, u64) -#define HILO_DMA_REGPAIR(regpair) (HILO_DMA(regpair.hi, regpair.lo)) +#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64) #define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) +#define HILO_DMA_REGPAIR(regpair) ((dma_addr_t)HILO_64_REGPAIR(regpair)) #ifndef __COMMON_HSI__ #define __COMMON_HSI__ -- cgit v1.2.3 From 30d1de08c87ddde6f73936c3350e7e153988fe02 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:48 -0700 Subject: hv_netvsc: make inline functions static Several new functions were introduced into hyperv.h but only used in one file. Move them and let compiler decide on inline. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 84 -------------------------------------------------- 1 file changed, 84 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b10954a66939..a6bc974def8f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1422,88 +1422,4 @@ static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) return false; } -/* - * An API to support in-place processing of incoming VMBUS packets. - */ -#define VMBUS_PKT_TRAILER 8 - -static inline struct vmpacket_descriptor * -get_next_pkt_raw(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; - void *ring_buffer = hv_get_ring_buffer(ring_info); - struct vmpacket_descriptor *cur_desc; - u32 packetlen; - u32 dsize = ring_info->ring_datasize; - u32 delta = read_loc - ring_info->ring_buffer->read_index; - u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); - - if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) - return NULL; - - if ((read_loc + sizeof(*cur_desc)) > dsize) - return NULL; - - cur_desc = ring_buffer + read_loc; - packetlen = cur_desc->len8 << 3; - - /* - * If the packet under consideration is wrapping around, - * return failure. - */ - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) - return NULL; - - return cur_desc; -} - -/* - * A helper function to step through packets "in-place" - * This API is to be called after each successful call - * get_next_pkt_raw(). - */ -static inline void put_pkt_raw(struct vmbus_channel *channel, - struct vmpacket_descriptor *desc) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; - u32 packetlen = desc->len8 << 3; - u32 dsize = ring_info->ring_datasize; - - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) - BUG(); - /* - * Include the packet trailer. - */ - ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; -} - -/* - * This call commits the read index and potentially signals the host. - * Here is the pattern for using the "in-place" consumption APIs: - * - * while (get_next_pkt_raw() { - * process the packet "in-place"; - * put_pkt_raw(); - * } - * if (packets processed in place) - * commit_rd_index(); - */ -static inline void commit_rd_index(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *ring_info = &channel->inbound; - /* - * Make sure all reads are done before we update the read index since - * the writer may start writing to the read area once the read index - * is updated. - */ - virt_rmb(); - ring_info->ring_buffer->read_index = ring_info->priv_read_index; - - if (hv_need_to_signal_on_read(ring_info)) - vmbus_set_event(channel); -} - - #endif /* _HYPERV_H */ -- cgit v1.2.3 From e3f74b841d482e962b9f5a907eeb25eeeb09aa60 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 23 Aug 2016 12:17:56 -0700 Subject: hv_netvsc: report vmbus name in ethtool Make netvsc on vmbus behave more like PCI. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index a6bc974def8f..b01c8c3dd531 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1114,6 +1114,13 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, const char *mod_name); void vmbus_driver_unregister(struct hv_driver *hv_driver); +static inline const char *vmbus_dev_name(const struct hv_device *device_obj) +{ + const struct kobject *kobj = &device_obj->device.kobj; + + return kobj->name; +} + void vmbus_hvsock_device_unregister(struct vmbus_channel *channel); int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, -- cgit v1.2.3 From 3a1281848830fcb3202cfd7ffe62d19641471d05 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 9 Aug 2016 17:36:02 -0700 Subject: soc: qcom: smd: Correct compile stub prototypes The prototypes for the compile stubs was not properly marked as static inline, this patch corrects this. Fixes: f79a917e69e1 ("Merge tag 'qcom-soc-for-4.7-2' into net-next") Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index cbb0f06c41b2..910ce1d9ba89 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -83,14 +83,14 @@ qcom_smd_open_channel(struct qcom_smd_channel *channel, return NULL; } -void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel) +static inline void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel) { /* This shouldn't be possible */ WARN_ON(1); return NULL; } -void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data) +static inline void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data) { /* This shouldn't be possible */ WARN_ON(1); -- cgit v1.2.3 From 0a0c08cae01b33b29abd24608d3800986546f0af Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 9 Aug 2016 17:39:19 -0700 Subject: soc: qcom: smd: Simplify multi channel handling Multi-channel clients split between several drivers need a way to close individual channels, as these drivers might be removed individually. With this in place the responsibility of closing additionally opened channels to the client as well only concerning smd about the primary channel. With this approach we will only trigger removal of SMD devices based on the state of the primary channel, however we get in sync with how rpmsg works. Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smd.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index 910ce1d9ba89..324b1decfffb 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -55,6 +55,7 @@ void qcom_smd_driver_unregister(struct qcom_smd_driver *drv); struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_channel *channel, const char *name, qcom_smd_cb_t cb); +void qcom_smd_close_channel(struct qcom_smd_channel *channel); void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel); void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data); int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); @@ -83,6 +84,12 @@ qcom_smd_open_channel(struct qcom_smd_channel *channel, return NULL; } +static inline void qcom_smd_close_channel(struct qcom_smd_channel *channel) +{ + /* This shouldn't be possible */ + WARN_ON(1); +} + static inline void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel) { /* This shouldn't be possible */ -- cgit v1.2.3 From da0573026c2d3d445c39385024bfc3ce6beebe09 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 15 Aug 2016 11:15:57 -0700 Subject: soc: qcom: smd: Represent smd edges as devices By representing each edge as its own device the channels are no longer tied to being parented by the same smd device and as such an edge can live as children of e.g. remoteproc instances. Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smd.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index 324b1decfffb..f148e0ffbec7 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -61,6 +61,10 @@ void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data); int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); +struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent, + struct device_node *node); +int qcom_smd_unregister_edge(struct qcom_smd_edge *edge); + #else static inline int qcom_smd_driver_register(struct qcom_smd_driver *drv) @@ -111,6 +115,20 @@ static inline int qcom_smd_send(struct qcom_smd_channel *channel, return -ENXIO; } +static inline struct qcom_smd_edge * +qcom_smd_register_edge(struct device *parent, + struct device_node *node) +{ + return ERR_PTR(-ENXIO); +} + +static inline int qcom_smd_unregister_edge(struct qcom_smd_edge *edge) +{ + /* This shouldn't be possible */ + WARN_ON(1); + return -ENXIO; +} + #endif #define module_qcom_smd_driver(__smd_driver) \ -- cgit v1.2.3 From ba14a194a434ccc8f733e263ad2ce941e35e5787 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 11 Aug 2016 02:35:21 -0700 Subject: fork: Add generic vmalloced stack support If CONFIG_VMAP_STACK=y is selected, kernel stacks are allocated with __vmalloc_node_range(). Grsecurity has had a similar feature (called GRKERNSEC_KSTACKOVERFLOW=y) for a long time. Signed-off-by: Andy Lutomirski Acked-by: Michal Hocko Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/14c07d4fd173a5b117f51e8b939f9f4323e39899.1470907718.git.luto@kernel.org [ Minor edits. ] Signed-off-by: Ingo Molnar --- include/linux/sched.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 62c68e513e39..20f9f47bcfd0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1923,6 +1923,9 @@ struct task_struct { #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; #endif +#ifdef CONFIG_VMAP_STACK + struct vm_struct *stack_vm_area; +#endif /* CPU-specific state of this task */ struct thread_struct thread; /* @@ -1939,6 +1942,18 @@ extern int arch_task_struct_size __read_mostly; # define arch_task_struct_size (sizeof(struct task_struct)) #endif +#ifdef CONFIG_VMAP_STACK +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return t->stack_vm_area; +} +#else +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return NULL; +} +#endif + /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) -- cgit v1.2.3 From daa460a88c09b26b68e8b017de589c217e901afb Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 19 Aug 2016 06:52:56 -0500 Subject: ftrace: Only allocate the ret_stack 'fp' field when needed This saves some memory when HAVE_FUNCTION_GRAPH_FP_TEST isn't defined. On x86_64 with newer versions of gcc which have -mfentry, it saves 400 bytes per task. Signed-off-by: Josh Poimboeuf Acked-by: Steven Rostedt Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Byungchul Park Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Nilay Vaish Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5c7747d9ea7b5cb47ef0a8ce8a6cea6bf7aa94bf.1471607358.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7d565afe35d2..4ad9ccc60e38 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -795,7 +795,9 @@ struct ftrace_ret_stack { unsigned long func; unsigned long long calltime; unsigned long long subtime; +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; +#endif }; /* -- cgit v1.2.3 From 9a7c348ba6a46f6270d4fe49577649dad5664fe7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 19 Aug 2016 06:52:57 -0500 Subject: ftrace: Add return address pointer to ftrace_ret_stack Storing this value will help prevent unwinders from getting out of sync with the function graph tracer ret_stack. Now instead of needing a stateful iterator, they can compare the return address pointer to find the right ret_stack entry. Note that an array of 50 ftrace_ret_stack structs is allocated for every task. So when an arch implements this, it will add either 200 or 400 bytes of memory usage per task (depending on whether it's a 32-bit or 64-bit platform). Signed-off-by: Josh Poimboeuf Acked-by: Steven Rostedt Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Byungchul Park Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Nilay Vaish Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a95cfcc39e8f26b89a430c56926af0bb217bc0a1.1471607358.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4ad9ccc60e38..483e02a50d37 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -798,6 +798,9 @@ struct ftrace_ret_stack { #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif +#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR + unsigned long *retp; +#endif }; /* @@ -809,7 +812,7 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, - unsigned long frame_pointer); + unsigned long frame_pointer, unsigned long *retp); /* * Sometimes we don't want to trace a function with the function -- cgit v1.2.3 From 223918e32a87c79ac55ca4aa513ba405ba4d57cd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 19 Aug 2016 06:52:58 -0500 Subject: ftrace: Add ftrace_graph_ret_addr() stack unwinding helpers When function graph tracing is enabled for a function, ftrace modifies the stack by replacing the original return address with the address of a hook function (return_to_handler). Stack unwinders need a way to get the original return address. Add an arch-independent helper function for that named ftrace_graph_ret_addr(). This adds two variations of the function: one depends on HAVE_FUNCTION_GRAPH_RET_ADDR_PTR, and the other relies on an index state variable. The former is recommended because, in some cases, the latter can cause problems when the unwinder skips stack frames. It can get out of sync with the ret_stack index and wrong addresses can be reported for the stack trace. Once all arches have been ported to use HAVE_FUNCTION_GRAPH_RET_ADDR_PTR, we can get rid of the distinction. Signed-off-by: Josh Poimboeuf Acked-by: Steven Rostedt Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Byungchul Park Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Nilay Vaish Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/36bd90f762fc5e5af3929e3797a68a64906421cf.1471607358.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 483e02a50d37..6f93ac46e7f0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -814,6 +814,9 @@ extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, unsigned long frame_pointer, unsigned long *retp); +unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, + unsigned long ret, unsigned long *retp); + /* * Sometimes we don't want to trace a function with the function * graph tracer but we want them to keep traced by the usual function @@ -875,6 +878,13 @@ static inline int task_curr_ret_stack(struct task_struct *tsk) return -1; } +static inline unsigned long +ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, + unsigned long *retp) +{ + return ret; +} + static inline void pause_graph_tracing(void) { } static inline void unpause_graph_tracing(void) { } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From ed0bd721c9322e8e6ee953884706080d40106a6e Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 18 Aug 2016 13:37:21 +0100 Subject: hwrng: core - Improve description of the ->read() interface Currently, very few RNG drivers support single byte reads using the ->read() interface. Of the 14 drivers in drivers/char/hw_random that support this interface only three of these actually support max == 1. The other behaviours vary between return 0, return 2, return 4 and return -EIO). This is not a problem in practice because the core hw_random code never performs a read shorter than 16 bytes. The documentation for this function already contrains the alignment of the buffer pointer, so let's also guarantee that the buffer is at least as large as its alignment. This constraint is intended to be the weakest guarantee neccessary to allow driver writers to safely simplify their code. Signed-off-by: Daniel Thompson Signed-off-by: Herbert Xu --- include/linux/hw_random.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 4f7d8f4b1e9a..34a0dc18f327 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -29,7 +29,9 @@ * Returns the number of lower random bytes in "data". * Must not be NULL. *OBSOLETE* * @read: New API. drivers can fill up to max bytes of data - * into the buffer. The buffer is aligned for any type. + * into the buffer. The buffer is aligned for any type + * and max is guaranteed to be >= to that alignment + * (either 4 or 8 depending on architecture). * @priv: Private data, for use by the RNG driver. * @quality: Estimation of true entropy in RNG's bitstream * (per mill). -- cgit v1.2.3 From f01d5cb24ea92494f9e093e6ee411364a686be99 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 2 Jun 2016 16:45:08 +0200 Subject: libceph: rename ceph_entity_name_encode() -> ceph_auth_entity_name_encode() Clear up EntityName vs entity_name_t confusion. Signed-off-by: Ilya Dryomov Reviewed-by: Mike Christie Reviewed-by: Alex Elder --- include/linux/ceph/auth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 1563265d2097..374bb1c4ef52 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -104,7 +104,7 @@ extern int ceph_auth_build_hello(struct ceph_auth_client *ac, extern int ceph_handle_auth_reply(struct ceph_auth_client *ac, void *buf, size_t len, void *reply_buf, size_t reply_len); -extern int ceph_entity_name_encode(const char *name, void **p, void *end); +int ceph_auth_entity_name_encode(const char *name, void **p, void *end); extern int ceph_build_auth(struct ceph_auth_client *ac, void *msg_buf, size_t msg_len); -- cgit v1.2.3 From a4ed38d7a180f184a6e7aedd09db9ca4b1e6a71c Mon Sep 17 00:00:00 2001 From: Douglas Fuller Date: Fri, 17 Jul 2015 13:18:07 -0700 Subject: libceph: support for CEPH_OSD_OP_LIST_WATCHERS Add support for this Ceph OSD op, needed to support the RBD exclusive lock feature. Signed-off-by: Douglas Fuller [idryomov@gmail.com: refactor, misc fixes throughout] Signed-off-by: Ilya Dryomov Reviewed-by: Mike Christie Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 858932304260..19821a191732 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -120,6 +120,9 @@ struct ceph_osd_req_op { struct ceph_osd_data request_data; struct ceph_osd_data response_data; } notify; + struct { + struct ceph_osd_data response_data; + } list_watchers; struct { u64 expected_object_size; u64 expected_write_size; @@ -249,6 +252,12 @@ struct ceph_osd_linger_request { size_t *preply_len; }; +struct ceph_watch_item { + struct ceph_entity_name name; + u64 cookie; + struct ceph_entity_addr addr; +}; + struct ceph_osd_client { struct ceph_client *client; @@ -346,7 +355,6 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); - extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *class, const char *method); @@ -434,5 +442,10 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, size_t *preply_len); int ceph_osdc_watch_check(struct ceph_osd_client *osdc, struct ceph_osd_linger_request *lreq); +int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + struct ceph_watch_item **watchers, + u32 *num_watchers); #endif -- cgit v1.2.3 From 428a715811fe74e8a8f09b830c8d3b5245096f8d Mon Sep 17 00:00:00 2001 From: Douglas Fuller Date: Wed, 17 Jun 2015 14:49:45 -0400 Subject: libceph: add ceph_osdc_call() single-page helper Add a convenience function to osd_client to send Ceph OSD 'class' ops. The interface assumes that the request and reply data each consist of single pages. Signed-off-by: Douglas Fuller Signed-off-by: Ilya Dryomov Reviewed-by: Mike Christie Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 19821a191732..96337b15a60d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -397,6 +397,14 @@ extern void ceph_osdc_sync(struct ceph_osd_client *osdc); extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc); void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc); +int ceph_osdc_call(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + const char *class, const char *method, + unsigned int flags, + struct page *req_page, size_t req_len, + struct page *resp_page, size_t *resp_len); + extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, struct ceph_vino vino, struct ceph_file_layout *layout, -- cgit v1.2.3 From f66241cb99dac861aa2cedb9f05ffa98d70cbc6e Mon Sep 17 00:00:00 2001 From: Douglas Fuller Date: Thu, 18 Jun 2015 13:06:10 -0700 Subject: libceph: support for advisory locking on RADOS objects This patch adds support for rados lock, unlock and break lock. Based heavily on code by Mike Christie . Signed-off-by: Douglas Fuller Signed-off-by: Ilya Dryomov Reviewed-by: Mike Christie Reviewed-by: Alex Elder --- include/linux/ceph/cls_lock_client.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/ceph/cls_lock_client.h (limited to 'include/linux') diff --git a/include/linux/ceph/cls_lock_client.h b/include/linux/ceph/cls_lock_client.h new file mode 100644 index 000000000000..4e4dffef22bb --- /dev/null +++ b/include/linux/ceph/cls_lock_client.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_CEPH_CLS_LOCK_CLIENT_H +#define _LINUX_CEPH_CLS_LOCK_CLIENT_H + +#include + +enum ceph_cls_lock_type { + CEPH_CLS_LOCK_NONE = 0, + CEPH_CLS_LOCK_EXCLUSIVE = 1, + CEPH_CLS_LOCK_SHARED = 2, +}; + +int ceph_cls_lock(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, u8 type, char *cookie, + char *tag, char *desc, u8 flags); +int ceph_cls_unlock(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, char *cookie); +int ceph_cls_break_lock(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, char *cookie, + struct ceph_entity_name *locker); + +#endif -- cgit v1.2.3 From d4ed4a530562881cc5225050e42d96034f405aae Mon Sep 17 00:00:00 2001 From: Douglas Fuller Date: Mon, 29 Jun 2015 16:55:42 -0700 Subject: libceph: support for lock.lock_info Add an interface for the Ceph OSD lock.lock_info method and associated data structures. Based heavily on code by Mike Christie . Signed-off-by: Douglas Fuller [idryomov@gmail.com: refactor, misc fixes throughout] Signed-off-by: Ilya Dryomov Reviewed-by: Mike Christie Reviewed-by: Alex Elder --- include/linux/ceph/cls_lock_client.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/cls_lock_client.h b/include/linux/ceph/cls_lock_client.h index 4e4dffef22bb..84884d8d4710 100644 --- a/include/linux/ceph/cls_lock_client.h +++ b/include/linux/ceph/cls_lock_client.h @@ -9,6 +9,20 @@ enum ceph_cls_lock_type { CEPH_CLS_LOCK_SHARED = 2, }; +struct ceph_locker_id { + struct ceph_entity_name name; /* locker's client name */ + char *cookie; /* locker's cookie */ +}; + +struct ceph_locker_info { + struct ceph_entity_addr addr; /* locker's address */ +}; + +struct ceph_locker { + struct ceph_locker_id id; + struct ceph_locker_info info; +}; + int ceph_cls_lock(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, @@ -24,4 +38,12 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc, char *lock_name, char *cookie, struct ceph_entity_name *locker); +void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers); + +int ceph_cls_lock_info(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, u8 *type, char **tag, + struct ceph_locker **lockers, u32 *num_lockers); + #endif -- cgit v1.2.3 From 6305a3b415157759bfe4b50a643fac22ea229f5c Mon Sep 17 00:00:00 2001 From: Douglas Fuller Date: Wed, 22 Jul 2015 20:59:52 -0400 Subject: libceph: support for blacklisting clients Reuse ceph_mon_generic_request infrastructure for sending monitor commands. In particular, add support for 'blacklist add' to prevent other, non-responsive clients from making further updates. Signed-off-by: Douglas Fuller [idryomov@gmail.com: refactor, misc fixes throughout] Signed-off-by: Ilya Dryomov Reviewed-by: Mike Christie Reviewed-by: Alex Elder --- include/linux/ceph/ceph_fs.h | 11 +++++++++++ include/linux/ceph/mon_client.h | 3 +++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 7868d602c0a0..c086e63dcee1 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -138,6 +138,9 @@ struct ceph_dir_layout { #define CEPH_MSG_POOLOP_REPLY 48 #define CEPH_MSG_POOLOP 49 +/* mon commands */ +#define CEPH_MSG_MON_COMMAND 50 +#define CEPH_MSG_MON_COMMAND_ACK 51 /* osd */ #define CEPH_MSG_OSD_MAP 41 @@ -176,6 +179,14 @@ struct ceph_mon_statfs_reply { struct ceph_statfs st; } __attribute__ ((packed)); +struct ceph_mon_command { + struct ceph_mon_request_header monhdr; + struct ceph_fsid fsid; + __le32 num_strs; /* always 1 */ + __le32 str_len; + char str[]; +} __attribute__ ((packed)); + struct ceph_osd_getmap { struct ceph_mon_request_header monhdr; struct ceph_fsid fsid; diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index 24d704d1ea5c..d5a3ecea578d 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -141,6 +141,9 @@ int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what, int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what, ceph_monc_callback_t cb, u64 private_data); +int ceph_monc_blacklist_add(struct ceph_mon_client *monc, + struct ceph_entity_addr *client_addr); + extern int ceph_monc_open_session(struct ceph_mon_client *monc); extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); -- cgit v1.2.3 From 033268a5f01270f0ef20d1a9a078b157f4af97f8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 12 Aug 2016 14:59:58 +0200 Subject: libceph: rename ceph_client_id() -> ceph_client_gid() It's gid / global_id in other places. Signed-off-by: Ilya Dryomov Reviewed-by: Mike Christie Reviewed-by: Alex Elder --- include/linux/ceph/libceph.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 83fc1fff7061..b4cffff70e44 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -264,7 +264,7 @@ extern struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, u64 supported_features, u64 required_features); -extern u64 ceph_client_id(struct ceph_client *client); +u64 ceph_client_gid(struct ceph_client *client); extern void ceph_destroy_client(struct ceph_client *client); extern int __ceph_open_session(struct ceph_client *client, unsigned long started); -- cgit v1.2.3 From 005a07bf0a92e7f0e73fc9a6c9acc992c5dbd00c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 18 Aug 2016 18:38:43 +0200 Subject: rbd: add 'client_addr' sysfs rbd device attribute Export client addr/nonce, so userspace can check if a image is being blacklisted. Signed-off-by: Mike Christie [idryomov@gmail.com: ceph_client_addr(), endianess fix] Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index b4cffff70e44..1816c5e26581 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -264,6 +264,7 @@ extern struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, u64 supported_features, u64 required_features); +struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client); u64 ceph_client_gid(struct ceph_client *client); extern void ceph_destroy_client(struct ceph_client *client); extern int __ceph_open_session(struct ceph_client *client, -- cgit v1.2.3 From 16b114a6d7973cf027e4c2b23eae1076eaf98c25 Mon Sep 17 00:00:00 2001 From: "Felipe F. Tonello" Date: Mon, 8 Aug 2016 21:30:04 +0100 Subject: usb: gadget: fix usb_ep_align_maybe endianness and new usb_ep_align USB spec specifies wMaxPacketSize to be little endian (as other properties), so when using this variable in the driver we should convert to the current CPU endianness if necessary. This patch also introduces usb_ep_align() which does always returns the aligned buffer size for an endpoint. This is useful to be used by USB requests allocator functions. Signed-off-by: Felipe F. Tonello Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 612dbdfa388e..3cc93237ff98 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -417,9 +417,21 @@ static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev) #define gadget_for_each_ep(tmp, gadget) \ list_for_each_entry(tmp, &(gadget)->ep_list, ep_list) +/** + * usb_ep_align - returns @len aligned to ep's maxpacketsize. + * @ep: the endpoint whose maxpacketsize is used to align @len + * @len: buffer size's length to align to @ep's maxpacketsize + * + * This helper is used to align buffer's size to an ep's maxpacketsize. + */ +static inline size_t usb_ep_align(struct usb_ep *ep, size_t len) +{ + return round_up(len, (size_t)le16_to_cpu(ep->desc->wMaxPacketSize)); +} + /** * usb_ep_align_maybe - returns @len aligned to ep's maxpacketsize if gadget - * requires quirk_ep_out_aligned_size, otherwise reguens len. + * requires quirk_ep_out_aligned_size, otherwise returns len. * @g: controller to check for quirk * @ep: the endpoint whose maxpacketsize is used to align @len * @len: buffer size's length to align to @ep's maxpacketsize @@ -430,8 +442,7 @@ static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev) static inline size_t usb_ep_align_maybe(struct usb_gadget *g, struct usb_ep *ep, size_t len) { - return !g->quirk_ep_out_aligned_size ? len : - round_up(len, (size_t)ep->desc->wMaxPacketSize); + return g->quirk_ep_out_aligned_size ? usb_ep_align(ep, len) : len; } /** -- cgit v1.2.3 From 1a00b457a5482c3822bfc0fd64c088b2dba93e26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20H=C3=A4dicke?= Date: Wed, 22 Jun 2016 01:12:08 +0200 Subject: usb: gadget: composite: let USB functions process ctrl reqs in cfg0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It can sometimes be necessary for gadget drivers to process non-standard control requests, which host devices can send without having sent USB_REQ_SET_CONFIGURATION. Therefore, the req_match() usb_function method is enhanced with the new parameter "config0". When a USB configuration is active, this parameter is false. When a non-core control request is processed in composite_setup(), without an active configuration, req_match() of the USB functions of all available configurations which implement this function, is called with config0=true. Then the control request gets processed by the first usb_function instance whose req_match() returns true. Signed-off-by: Felix Hädicke Signed-off-by: Felipe Balbi --- include/linux/usb/composite.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 2b81b24eb5aa..4616a49a1c2e 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -220,7 +220,8 @@ struct usb_function { int (*setup)(struct usb_function *, const struct usb_ctrlrequest *); bool (*req_match)(struct usb_function *, - const struct usb_ctrlrequest *); + const struct usb_ctrlrequest *, + bool config0); void (*suspend)(struct usb_function *); void (*resume)(struct usb_function *); -- cgit v1.2.3 From 8b2ec318eece89be5e33d5313a25461a55a3177a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sun, 12 Jun 2016 16:26:40 -0500 Subject: PCI: Add PTM clock granularity information The PTM Control register (PCIe r3.1, sec 7.32.3) contains an Effective Granularity field: This provides information relating to the expected accuracy of the PTM clock, but does not otherwise affect the PTM mechanism. Set the Effective Granularity based on the PTM Root and any intervening PTM Time Sources. This does not set Effective Granularity for Root Complex Integrated Endpoints because I don't know how to figure out clock granularity for them. The spec says: ... system software must set [Effective Granularity] to the value reported in the Local Clock Granularity field by the associated PTM Time Source. but I don't know how to identify the associated PTM Time Source. Normally it's the upstream bridge, but an integrated endpoint has no upstream bridge. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 9e4b6d6f3c8d..7256f33b6a15 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -371,6 +371,7 @@ struct pci_dev { #ifdef CONFIG_PCIE_PTM unsigned int ptm_root:1; unsigned int ptm_enabled:1; + u8 ptm_granularity; #endif #ifdef CONFIG_PCI_MSI const struct attribute_group **msi_irq_groups; -- cgit v1.2.3 From 9379e6b8e0f995365dc6158a1463c8dab4f2c8da Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Sun, 21 Aug 2016 23:23:18 -0500 Subject: libata: Safely overwrite attached page in WRITE SAME xlat Safely overwriting the attached page to ATA format from the SCSI formatted variant. Signed-off-by: Shaun Tancheff Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo --- include/linux/ata.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index adbc812c009b..45a1d71c55f1 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -1071,32 +1071,6 @@ static inline void ata_id_to_hd_driveid(u16 *id) #endif } -/* - * Write LBA Range Entries to the buffer that will cover the extent from - * sector to sector + count. This is used for TRIM and for ADD LBA(S) - * TO NV CACHE PINNED SET. - */ -static inline unsigned ata_set_lba_range_entries(void *_buffer, - unsigned num, u64 sector, unsigned long count) -{ - __le64 *buffer = _buffer; - unsigned i = 0, used_bytes; - - while (i < num) { - u64 entry = sector | - ((u64)(count > 0xffff ? 0xffff : count) << 48); - buffer[i++] = __cpu_to_le64(entry); - if (count <= 0xffff) - break; - count -= 0xffff; - sector += 0xffff; - } - - used_bytes = ALIGN(i * 8, 512); - memset(buffer + i, 0, used_bytes - i * 8); - return used_bytes; -} - static inline bool ata_ok(u8 status) { return ((status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | ATA_ERR)) -- cgit v1.2.3 From 7b20309428598df00ffeb0b01f5948dea6aaf1f7 Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Sun, 21 Aug 2016 23:23:19 -0500 Subject: libata: Add support for SCT Write Same SATA drives may support write same via SCT. This is useful for setting the drive contents to a specific pattern (0's). Translate a SCSI WRITE SAME 16 command to be either a DSM TRIM command or an SCT Write Same command. Based on the UNMAP flag: - When set translate to DSM TRIM - When not set translate to SCT Write Same Signed-off-by: Shaun Tancheff Reviewed-by: Hannes Reinecke Acked-by: Tejun Heo --- include/linux/ata.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 45a1d71c55f1..fdb180367ba1 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -105,6 +105,7 @@ enum { ATA_ID_CFA_KEY_MGMT = 162, ATA_ID_CFA_MODES = 163, ATA_ID_DATA_SET_MGMT = 169, + ATA_ID_SCT_CMD_XPORT = 206, ATA_ID_ROT_SPEED = 217, ATA_ID_PIO4 = (1 << 1), @@ -788,6 +789,48 @@ static inline bool ata_id_sense_reporting_enabled(const u16 *id) return id[ATA_ID_COMMAND_SET_4] & (1 << 6); } +/** + * + * Word: 206 - SCT Command Transport + * 15:12 - Vendor Specific + * 11:6 - Reserved + * 5 - SCT Command Transport Data Tables supported + * 4 - SCT Command Transport Features Control supported + * 3 - SCT Command Transport Error Recovery Control supported + * 2 - SCT Command Transport Write Same supported + * 1 - SCT Command Transport Long Sector Access supported + * 0 - SCT Command Transport supported + */ +static inline bool ata_id_sct_data_tables(const u16 *id) +{ + return id[ATA_ID_SCT_CMD_XPORT] & (1 << 5) ? true : false; +} + +static inline bool ata_id_sct_features_ctrl(const u16 *id) +{ + return id[ATA_ID_SCT_CMD_XPORT] & (1 << 4) ? true : false; +} + +static inline bool ata_id_sct_error_recovery_ctrl(const u16 *id) +{ + return id[ATA_ID_SCT_CMD_XPORT] & (1 << 3) ? true : false; +} + +static inline bool ata_id_sct_write_same(const u16 *id) +{ + return id[ATA_ID_SCT_CMD_XPORT] & (1 << 2) ? true : false; +} + +static inline bool ata_id_sct_long_sector_access(const u16 *id) +{ + return id[ATA_ID_SCT_CMD_XPORT] & (1 << 1) ? true : false; +} + +static inline bool ata_id_sct_supported(const u16 *id) +{ + return id[ATA_ID_SCT_CMD_XPORT] & (1 << 0) ? true : false; +} + /** * ata_id_major_version - get ATA level of drive * @id: Identify data -- cgit v1.2.3 From fb79e09a74faf39fbeee975e0ead442b7b7f6aac Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 29 Jun 2016 15:04:03 +0200 Subject: i2c: add i2c_trylock_bus wrapper, use it This unifies usage with i2c_lock_bus and i2c_unlock_bus, and paves the way for the next patch which looks a bit saner with this preparatory work taken care of beforehand. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index fffdc270ca18..c1f60a345db7 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -600,6 +600,20 @@ i2c_lock_bus(struct i2c_adapter *adapter, unsigned int flags) adapter->lock_bus(adapter, flags); } +/** + * i2c_trylock_bus - Try to get exclusive access to an I2C bus segment + * @adapter: Target I2C bus segment + * @flags: I2C_LOCK_ROOT_ADAPTER tries to locks the root i2c adapter, + * I2C_LOCK_SEGMENT tries to lock only this branch in the adapter tree + * + * Return: true if the I2C bus segment is locked, false otherwise + */ +static inline int +i2c_trylock_bus(struct i2c_adapter *adapter, unsigned int flags) +{ + return adapter->trylock_bus(adapter, flags); +} + /** * i2c_unlock_bus - Release exclusive access to an I2C bus segment * @adapter: Target I2C bus segment -- cgit v1.2.3 From b2d57b56047b99f2992fc958d7ac5ead283e9088 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Sat, 9 Jul 2016 21:53:42 +0200 Subject: i2c: mux: add support for 'i2c-mux', 'i2c-arb' and 'i2c-gate' DT subnodes Backwards compatibility is preserved; the subnodes are in practice optional. However, the mux core needs to know what subnode it should examine, so add a couple of new flags for i2c_mux_alloc for this purpose. The rule is that if the mux core finds a 'reg' property in the appropriate subnode, e.g. if 'reg' exists in the 'i2c-mux' subnode, then the mux core will assume that this is an old style entry and not an i2c-mux subnode (correspondingly for arbitrators and gates with 'i2c-arb' and 'i2c-gate'). Reviewed-by: Wolfram Sang Signed-off-by: Peter Rosin --- include/linux/i2c-mux.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index d4c1d12f900d..bd74d5706f3b 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -32,7 +32,9 @@ struct i2c_mux_core { struct i2c_adapter *parent; struct device *dev; - bool mux_locked; + unsigned int mux_locked:1; + unsigned int arbitrator:1; + unsigned int gate:1; void *priv; @@ -51,7 +53,9 @@ struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent, int (*deselect)(struct i2c_mux_core *, u32)); /* flags for i2c_mux_alloc */ -#define I2C_MUX_LOCKED BIT(0) +#define I2C_MUX_LOCKED BIT(0) +#define I2C_MUX_ARBITRATOR BIT(1) +#define I2C_MUX_GATE BIT(2) static inline void *i2c_mux_priv(struct i2c_mux_core *muxc) { -- cgit v1.2.3 From 5853b22d96fa786365ff11fe9f008a68a533f043 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 12 Aug 2016 17:02:53 +0300 Subject: i2c: core: Add function for finding the bus speed from ACPI, take 2 ACPI 5 specification doesn't have property for the I2C bus speed but I2cSerialBus resource descriptor which define each controller-slave connection define the maximum speed supported by that connection. Thus finding the maximum safe speed for the bus is to walk through all I2cSerialBus resources that are associated to I2C controller and use the speed of slowest connection. Add function i2c_acpi_find_bus_speed() to the i2c-core that adapter drivers can call prior registering itself to core. This implies two-step walk through the I2cSerialBus resources: call to i2c_acpi_find_bus_speed() does the first scan and finds the safe bus speed that adapter drivers can set up. Adapter driver registration does the second scan when i2c-core creates the I2C slaves by calling the i2c_acpi_register_devices(). In that way the bus speed is set in case slave device probe gets called during registration and does communication. Previous version commit 55d38d060e99 ("i2c: core: Add function for finding the bus speed from ACPI") got reverted due merge conflicts from commit 525e6fabeae2 ("i2c / ACPI: add support for ACPI reconfigure notifications"). This version is a bit bigger than previous version but is still sharing the lowest and complicated part of I2cSerialBus lookup routines with the existing code. Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index c1f60a345db7..5b4a3cbe5d7d 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -780,4 +780,13 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node } #endif /* CONFIG_OF */ +#if IS_ENABLED(CONFIG_ACPI) +u32 i2c_acpi_find_bus_speed(struct device *dev); +#else +static inline u32 i2c_acpi_find_bus_speed(struct device *dev) +{ + return 0; +} +#endif /* CONFIG_ACPI */ + #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From d391e552293399396c131544f5b1c2f9b1fb0baa Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 17 Aug 2016 13:50:25 +0100 Subject: cpu/hotplug: Allow suspend/resume CPU to be specified disable_nonboot_cpus() assumes that the lowest numbered online CPU is the boot CPU, and that this is the correct CPU to run any power management code on. On x86 this is always correct, as CPU0 cannot (easily) by taken offline. On arm64 CPU0 can be taken offline. For hibernate/resume this means we may hibernate on a CPU other than CPU0. If the system is rebooted with kexec 'CPU0' will be assigned to a different physical CPU. This complicates hibernate/resume as now we can't trust the CPU numbers. Arch code can find the correct physical CPU, and ensure it is online before resume from hibernate begins, but also needs to influence disable_nonboot_cpus()s choice of CPU. Rename disable_nonboot_cpus() as freeze_secondary_cpus() and add an argument indicating which CPU should be left standing. Follow the logic in migrate_to_reboot_cpu() to use the lowest numbered online CPU if the requested CPU is not online. Add disable_nonboot_cpus() as an inline function that has the existing behaviour. Cc: Rafael J. Wysocki Reviewed-by: Thomas Gleixner Signed-off-by: James Morse Signed-off-by: Will Deacon --- include/linux/cpu.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 797d9c8e9a1b..ad4f1f33a74e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -228,7 +228,11 @@ static inline void cpu_hotplug_done(void) {} #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM_SLEEP_SMP -extern int disable_nonboot_cpus(void); +extern int freeze_secondary_cpus(int primary); +static inline int disable_nonboot_cpus(void) +{ + return freeze_secondary_cpus(0); +} extern void enable_nonboot_cpus(void); #else /* !CONFIG_PM_SLEEP_SMP */ static inline int disable_nonboot_cpus(void) { return 0; } -- cgit v1.2.3 From 5ca8cc5bf11faed257c762018aea9106d529232f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Aug 2016 12:31:31 +0200 Subject: rhashtable: add rhashtable_lookup_get_insert_key() This patch modifies __rhashtable_insert_fast() so it returns the existing object that clashes with the one that you want to insert. In case the object is successfully inserted, NULL is returned. Otherwise, you get an error via ERR_PTR(). This patch adapts the existing callers of __rhashtable_insert_fast() so they handle this new logic, and it adds a new rhashtable_lookup_get_insert_key() interface to fetch this existing object. nf_tables needs this change to improve handling of EEXIST cases via honoring the NLM_F_EXCL flag and by checking if the data part of the mapping matches what we have. Cc: Herbert Xu Cc: Thomas Graf Signed-off-by: Pablo Neira Ayuso Acked-by: Herbert Xu --- include/linux/rhashtable.h | 70 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3eef0802a0cd..26b7a059c65e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, - struct bucket_table *old_tbl); + struct bucket_table *old_tbl, + void **data); int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, @@ -563,8 +564,11 @@ restart: return NULL; } -/* Internal function, please use rhashtable_insert_fast() instead */ -static inline int __rhashtable_insert_fast( +/* Internal function, please use rhashtable_insert_fast() instead. This + * function returns the existing element already in hashes in there is a clash, + * otherwise it returns an error via ERR_PTR(). + */ +static inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -577,6 +581,7 @@ static inline int __rhashtable_insert_fast( spinlock_t *lock; unsigned int elasticity; unsigned int hash; + void *data = NULL; int err; restart: @@ -601,11 +606,14 @@ restart: new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { - tbl = rhashtable_insert_slow(ht, key, obj, new_tbl); + tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data); if (!IS_ERR_OR_NULL(tbl)) goto slow_path; err = PTR_ERR(tbl); + if (err == -EEXIST) + err = 0; + goto out; } @@ -619,25 +627,25 @@ slow_path: err = rhashtable_insert_rehash(ht, tbl); rcu_read_unlock(); if (err) - return err; + return ERR_PTR(err); goto restart; } - err = -EEXIST; + err = 0; elasticity = ht->elasticity; rht_for_each(head, tbl, hash) { if (key && unlikely(!(params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head))))) + rhashtable_compare(&arg, rht_obj(ht, head))))) { + data = rht_obj(ht, head); goto out; + } if (!--elasticity) goto slow_path; } - err = 0; - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); @@ -652,7 +660,7 @@ out: spin_unlock_bh(lock); rcu_read_unlock(); - return err; + return err ? ERR_PTR(err) : data; } /** @@ -675,7 +683,13 @@ static inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { - return __rhashtable_insert_fast(ht, NULL, obj, params); + void *ret; + + ret = __rhashtable_insert_fast(ht, NULL, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; } /** @@ -704,11 +718,15 @@ static inline int rhashtable_lookup_insert_fast( const struct rhashtable_params params) { const char *key = rht_obj(ht, obj); + void *ret; BUG_ON(ht->p.obj_hashfn); - return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, - params); + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; } /** @@ -736,6 +754,32 @@ static inline int rhashtable_lookup_insert_fast( static inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) +{ + void *ret; + + BUG_ON(!ht->p.obj_hashfn || !key); + + ret = __rhashtable_insert_fast(ht, key, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; +} + +/** + * rhashtable_lookup_get_insert_key - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * @data: pointer to element data already in hashes + * + * Just like rhashtable_lookup_insert_key(), but this function returns the + * object if it exists, NULL if it does not and the insertion was successful, + * and an ERR_PTR otherwise. + */ +static inline void *rhashtable_lookup_get_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) { BUG_ON(!ht->p.obj_hashfn || !key); -- cgit v1.2.3 From d42c047245ae09ebc2f506c5d1730ce3113278ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 26 Aug 2016 14:58:07 +0200 Subject: clk: return unsigned int in dummy non-OF of_clk_get_parent_count() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the commit 929e7f3bc7b82 ("clk: Make of_clk_get_parent_count() return unsigned ints") of_clk_get_parent_count has been modified to return unsigned int. There is also a dummy implementation of the same function for configs without CONFIG_OF. For the consistency it should be updated as well. Signed-off-by: Rafał Miłecki Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index f403b8a5f8ca..37b8fdce0e49 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -854,7 +854,7 @@ of_clk_hw_onecell_get(struct of_phandle_args *clkspec, void *data) { return ERR_PTR(-ENOENT); } -static inline int of_clk_get_parent_count(struct device_node *np) +static inline unsigned int of_clk_get_parent_count(struct device_node *np) { return 0; } -- cgit v1.2.3 From 6bc506b4fb065eac3d89ca1ce37082e174493d9e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:37 +0200 Subject: bridge: switchdev: Add forward mark support for stacked devices switchdev_port_fwd_mark_set() is used to set the 'offload_fwd_mark' of port netdevs so that packets being flooded by the device won't be flooded twice. It works by assigning a unique identifier (the ifindex of the first bridge port) to bridge ports sharing the same parent ID. This prevents packets from being flooded twice by the same switch, but will flood packets through bridge ports belonging to a different switch. This method is problematic when stacked devices are taken into account, such as VLANs. In such cases, a physical port netdev can have upper devices being members in two different bridges, thus requiring two different 'offload_fwd_mark's to be configured on the port netdev, which is impossible. The main problem is that packet and netdev marking is performed at the physical netdev level, whereas flooding occurs between bridge ports, which are not necessarily port netdevs. Instead, packet and netdev marking should really be done in the bridge driver with the switch driver only telling it which packets it already forwarded. The bridge driver will mark such packets using the mark assigned to the ingress bridge port and will prevent the packet from being forwarded through any bridge port sharing the same mark (i.e. having the same parent ID). Remove the current switchdev 'offload_fwd_mark' implementation and instead implement the proposed method. In addition, make rocker - the sole user of the mark - use the proposed method. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ----- include/linux/skbuff.h | 13 +++++-------- 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 794bb0733799..d122be9345c7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1562,8 +1562,6 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * - * @offload_fwd_mark: Offload device fwding mark - * * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1814,9 +1812,6 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *egress_cl_list; #endif -#ifdef CONFIG_NET_SWITCHDEV - u32 offload_fwd_mark; -#endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7047448e8129..cfb7219be665 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,7 +612,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking - * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -730,7 +729,10 @@ struct sk_buff { __u8 ipvs_property:1; __u8 inner_protocol_type:1; __u8 remcsum_offload:1; - /* 3 or 5 bit hole */ +#ifdef CONFIG_NET_SWITCHDEV + __u8 offload_fwd_mark:1; +#endif + /* 2, 4 or 5 bit hole */ #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -757,14 +759,9 @@ struct sk_buff { unsigned int sender_cpu; }; #endif - union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; #endif -#ifdef CONFIG_NET_SWITCHDEV - __u32 offload_fwd_mark; -#endif - }; union { __u32 mark; -- cgit v1.2.3 From 0294b625ad5a6d1fb50632d67cf384862d8a4a46 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 28 Aug 2016 14:43:17 -0700 Subject: net: Add read_sock proto_op Add new function in proto_ops structure. This includes moving the typedef got sk_read_actor into net.h and removing the definition from tcp.h. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/net.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index b9f0ff4d489c..cd0c8bd0a1de 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -128,6 +129,9 @@ struct page; struct sockaddr; struct msghdr; struct module; +struct sk_buff; +typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, + unsigned int, size_t); struct proto_ops { int family; @@ -186,6 +190,8 @@ struct proto_ops { struct pipe_inode_info *pipe, size_t len, unsigned int flags); int (*set_peek_off)(struct sock *sk, int val); int (*peek_len)(struct socket *sock); + int (*read_sock)(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor); }; #define DECLARE_SOCKADDR(type, dst, src) \ -- cgit v1.2.3 From 0680b0cabcd0a2264f0ad8ac569caf928f65afb6 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 24 Aug 2016 12:10:17 +0300 Subject: memory: omap-gpmc: Fix build with CONFIG_OMAP_GPMC disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following build failure if omap-gpmc.h is used with CONFIG_OMAP_GPMC disabled. ./include/linux/omap-gpmc.h:32:1: error: unknown type name ‘gpmc_nand_ops’ Signed-off-by: Roger Quadros --- include/linux/omap-gpmc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 9e9d79e8efa5..35d0fd7a4948 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -29,8 +29,8 @@ struct gpmc_nand_regs; struct gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *regs, int cs); #else -static inline gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *regs, - int cs) +static inline struct gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *regs, + int cs) { return NULL; } -- cgit v1.2.3 From 474fd6e80fe529e9adeeb7ea9d4e5d6c4da0b7fe Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 23 Aug 2016 13:30:24 +0200 Subject: RAID/s390: add SIMD implementation for raid6 gen/xor Using vector registers is slightly faster: raid6: vx128x8 gen() 19705 MB/s raid6: vx128x8 xor() 11886 MB/s raid6: using algorithm vx128x8 gen() 19705 MB/s raid6: .... xor() 11886 MB/s, rmw enabled vs the software algorithms: raid6: int64x1 gen() 3018 MB/s raid6: int64x1 xor() 1429 MB/s raid6: int64x2 gen() 4661 MB/s raid6: int64x2 xor() 3143 MB/s raid6: int64x4 gen() 5392 MB/s raid6: int64x4 xor() 3509 MB/s raid6: int64x8 gen() 4441 MB/s raid6: int64x8 xor() 3207 MB/s raid6: using algorithm int64x4 gen() 5392 MB/s raid6: .... xor() 3509 MB/s, rmw enabled Signed-off-by: Martin Schwidefsky --- include/linux/raid/pq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index a0118d5929a9..c032a6a408a6 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -103,6 +103,7 @@ extern const struct raid6_calls raid6_avx2x1; extern const struct raid6_calls raid6_avx2x2; extern const struct raid6_calls raid6_avx2x4; extern const struct raid6_calls raid6_tilegx8; +extern const struct raid6_calls raid6_s390vx8; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); -- cgit v1.2.3 From f72b8792d180948b4b3898374998f5ac8c02e539 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Aug 2016 15:51:50 -0600 Subject: workqueue: add cancel_work() Like cancel_delayed_work(), but for regular work. Signed-off-by: Jens Axboe Mehed-by: Tejun Heo Acked-by: Tejun Heo --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 26cc1df280d6..fc6e22186405 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -442,6 +442,7 @@ extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); +extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); -- cgit v1.2.3 From ee63cfa7fc197b63669623721b8009cce5b0659b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Aug 2016 15:52:48 -0600 Subject: block: add kblockd_schedule_work_on() Add a helper to schedule a regular struct work on a particular CPU. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e79055c8b577..69aae720f4ef 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1440,8 +1440,8 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio) return bio_will_gap(req->q, bio, req->bio); } -struct work_struct; int kblockd_schedule_work(struct work_struct *work); +int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); -- cgit v1.2.3 From 27489a3c827b7eebba26eda0320bb0f100bef167 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Aug 2016 15:54:25 -0600 Subject: blk-mq: turn hctx->run_work into a regular work struct We don't need the larger delayed work struct, since we always run it immediately. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e43bbffb5b7a..d579252e6463 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -25,7 +25,7 @@ struct blk_mq_hw_ctx { } ____cacheline_aligned_in_smp; unsigned long state; /* BLK_MQ_S_* flags */ - struct delayed_work run_work; + struct work_struct run_work; struct delayed_work delay_work; cpumask_var_t cpumask; int next_cpu; -- cgit v1.2.3 From 8d354f133e86dd03ea7885a91df398c55ff699ff Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 25 Aug 2016 08:00:28 -0600 Subject: blk-mq: improve layout of blk_mq_hw_ctx Various cache line optimizations: - Move delay_work towards the end. It's huge, and we don't use it a lot (only SCSI). - Move the atomic state into the same cacheline as the the dispatch list and lock. - Rearrange a few members to pack it better. - Shrink the max-order for dispatch accounting from 10 to 7. This means that ->dispatched[] and ->run now take up their own cacheline. This shrinks struct blk_mq_hw_ctx down to 8 cachelines. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d579252e6463..e1544f0f8c21 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -22,11 +22,10 @@ struct blk_mq_hw_ctx { struct { spinlock_t lock; struct list_head dispatch; + unsigned long state; /* BLK_MQ_S_* flags */ } ____cacheline_aligned_in_smp; - unsigned long state; /* BLK_MQ_S_* flags */ struct work_struct run_work; - struct delayed_work delay_work; cpumask_var_t cpumask; int next_cpu; int next_cpu_batch; @@ -40,8 +39,8 @@ struct blk_mq_hw_ctx { struct blk_mq_ctxmap ctx_map; - unsigned int nr_ctx; struct blk_mq_ctx **ctxs; + unsigned int nr_ctx; atomic_t wait_index; @@ -49,7 +48,7 @@ struct blk_mq_hw_ctx { unsigned long queued; unsigned long run; -#define BLK_MQ_MAX_DISPATCH_ORDER 10 +#define BLK_MQ_MAX_DISPATCH_ORDER 7 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned int numa_node; @@ -57,6 +56,8 @@ struct blk_mq_hw_ctx { atomic_t nr_active; + struct delayed_work delay_work; + struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj; -- cgit v1.2.3 From ba9c8dd3c22275e46feef429f343b85e9cf3924c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 22 Aug 2016 19:28:37 -0700 Subject: acpi, nfit: add dimm device notification support Per "ACPI 6.1 Section 9.20.3" NVDIMM devices, children of the ACPI0012 NVDIMM Root device, can receive health event notifications. Given that these devices are precluded from registering a notification handler via acpi_driver.acpi_device_ops (due to no _HID), we use acpi_install_notify_handler() directly. The registered handler, acpi_nvdimm_notify(), triggers a poll(2) event on the nmemX/nfit/flags sysfs attribute when a health event notification is received. Cc: Rafael J. Wysocki Tested-by: Toshi Kani Reviewed-by: Vishal Verma Acked-by: Rafael J. Wysocki Reviewed-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index b519e137b9b7..ad18d0531b6e 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -139,6 +139,7 @@ struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); +struct kobject *nvdimm_kobj(struct nvdimm *nvdimm); unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, -- cgit v1.2.3 From d1ed7985b9a6b85ea38a330108c51ec83381c01b Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Thu, 25 Aug 2016 23:07:01 +0200 Subject: i2c: move locking operations to their own struct This makes it trivial to constify them, so do that. Signed-off-by: Peter Rosin Acked-by: Daniel Vetter Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 5b4a3cbe5d7d..4a4099d3a4b9 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -426,6 +426,20 @@ struct i2c_algorithm { #endif }; +/** + * struct i2c_lock_operations - represent I2C locking operations + * @lock_bus: Get exclusive access to an I2C bus segment + * @trylock_bus: Try to get exclusive access to an I2C bus segment + * @unlock_bus: Release exclusive access to an I2C bus segment + * + * The main operations are wrapped by i2c_lock_bus and i2c_unlock_bus. + */ +struct i2c_lock_operations { + void (*lock_bus)(struct i2c_adapter *, unsigned int flags); + int (*trylock_bus)(struct i2c_adapter *, unsigned int flags); + void (*unlock_bus)(struct i2c_adapter *, unsigned int flags); +}; + /** * struct i2c_timings - I2C timing information * @bus_freq_hz: the bus frequency in Hz @@ -536,6 +550,7 @@ struct i2c_adapter { void *algo_data; /* data fields that are valid for all devices */ + const struct i2c_lock_operations *lock_ops; struct rt_mutex bus_lock; struct rt_mutex mux_lock; @@ -552,10 +567,6 @@ struct i2c_adapter { struct i2c_bus_recovery_info *bus_recovery_info; const struct i2c_adapter_quirks *quirks; - - void (*lock_bus)(struct i2c_adapter *, unsigned int flags); - int (*trylock_bus)(struct i2c_adapter *, unsigned int flags); - void (*unlock_bus)(struct i2c_adapter *, unsigned int flags); }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) @@ -597,7 +608,7 @@ int i2c_for_each_dev(void *data, int (*fn)(struct device *, void *)); static inline void i2c_lock_bus(struct i2c_adapter *adapter, unsigned int flags) { - adapter->lock_bus(adapter, flags); + adapter->lock_ops->lock_bus(adapter, flags); } /** @@ -611,7 +622,7 @@ i2c_lock_bus(struct i2c_adapter *adapter, unsigned int flags) static inline int i2c_trylock_bus(struct i2c_adapter *adapter, unsigned int flags) { - return adapter->trylock_bus(adapter, flags); + return adapter->lock_ops->trylock_bus(adapter, flags); } /** @@ -623,7 +634,7 @@ i2c_trylock_bus(struct i2c_adapter *adapter, unsigned int flags) static inline void i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags) { - adapter->unlock_bus(adapter, flags); + adapter->lock_ops->unlock_bus(adapter, flags); } static inline void -- cgit v1.2.3 From daae45caf5a042a0c7d147749ed1e4c970fc86d8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 28 Jul 2016 02:25:41 +0200 Subject: ACPI / bus: Make acpi_get_first_physical_node() public Following the fwnode of a device is currently a one-way road: We provide ACPI_COMPANION() to obtain the fwnode but there's no (public) method to do the reverse. Granted, there may be multiple physical_nodes, but often the first one in the list is sufficient. A handy function to obtain it was introduced with commit 3b95bd160547 ("ACPI: introduce a function to find the first physical device"), but currently it's only available internally. We're about to add an EFI Device Path parser which needs this function. Consider the following device path: ACPI(PNP0A03,0)/PCI(28,2)/PCI(0,0) The PCI root is encoded as an ACPI device in the path, so the parser has to find the corresponding ACPI device, then find its physical node, find the PCI bridge in slot 1c (decimal 28), function 2 below it and finally find the PCI device in slot 0, function 0. To this end, make acpi_get_first_physical_node() public. Signed-off-by: Lukas Wunner Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4d8452c2384b..536c12ed2669 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -85,6 +85,8 @@ static inline const char *acpi_dev_name(struct acpi_device *adev) return dev_name(&adev->dev); } +struct device *acpi_get_first_physical_node(struct acpi_device *adev); + enum acpi_irq_model_id { ACPI_IRQ_MODEL_PIC = 0, ACPI_IRQ_MODEL_IOAPIC, @@ -634,6 +636,11 @@ static inline const char *acpi_dev_name(struct acpi_device *adev) return NULL; } +static inline struct device *acpi_get_first_physical_node(struct acpi_device *adev) +{ + return NULL; +} + static inline void acpi_early_init(void) { } static inline void acpi_subsystem_init(void) { } -- cgit v1.2.3 From 60e7396f820fa67a007f2a2eb5d97d3e77a74881 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 22 Aug 2016 17:48:25 +0900 Subject: usb: gadget: add a new quirk to avoid skb_reserve in u_ether.c Some platforms (e.g. USB-DMAC on R-Car SoCs) has memory alignment restriction. If memory alignment is not match, the usb peripheral driver decides not to use the DMA controller. Then, the performance is not good. In the case of u_ether.c, since it calls skb_reserve() in rx_submit(), it is possible to cause memory alignment mismatch. So, this patch adds a new quirk "quirk_avoids_skb_reserve" to avoid skb_reserve() calling in u_ether.c to improve performance. A peripheral driver will set this flag and network gadget drivers (e.g. f_ncm.c) will reference the flag via gadget_avoids_skb_reserve(). Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 3cc93237ff98..8e81f9eb95e4 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -346,6 +346,8 @@ struct usb_gadget_ops { * or B-Peripheral wants to take host role. * @quirk_ep_out_aligned_size: epout requires buffer size to be aligned to * MaxPacketSize. + * @quirk_avoids_skb_reserve: udc/platform wants to avoid skb_reserve() in + * u_ether.c to improve performance. * @is_selfpowered: if the gadget is self-powered. * @deactivated: True if gadget is deactivated - in deactivated state it cannot * be connected. @@ -398,6 +400,7 @@ struct usb_gadget { unsigned quirk_altset_not_supp:1; unsigned quirk_stall_not_supp:1; unsigned quirk_zlp_not_supp:1; + unsigned quirk_avoids_skb_reserve:1; unsigned is_selfpowered:1; unsigned deactivated:1; unsigned connected:1; @@ -473,6 +476,16 @@ static inline int gadget_is_zlp_supported(struct usb_gadget *g) return !g->quirk_zlp_not_supp; } +/** + * gadget_avoids_skb_reserve - return true iff the hardware would like to avoid + * skb_reserve to improve performance. + * @g: controller to check for quirk + */ +static inline int gadget_avoids_skb_reserve(struct usb_gadget *g) +{ + return g->quirk_avoids_skb_reserve; +} + /** * gadget_is_dualspeed - return true iff the hardware handles high speed * @g: controller that might support both high and full speeds -- cgit v1.2.3 From 6f1d912b687d3d17c1731f5bda3b5d6703bce4a0 Mon Sep 17 00:00:00 2001 From: Vic Yang Date: Wed, 10 Aug 2016 19:05:24 +0200 Subject: mfd: cros_ec: Add MKBP event support Newer revisions of the ChromeOS EC add more events besides the keyboard ones. So handle interrupts in the MFD driver and let consumers register for notifications for the events they might care. To keep backward compatibility, if the EC doesn't support MKBP event, we fall back to the old MKBP key matrix host command. Cc: Randall Spangler Cc: Vincent Palatin Cc: Benson Leung Signed-off-by: Vic Yang Signed-off-by: Tomeu Vizoso Tested-by: Enric Balletbo i Serra Acked-by: Olof Johansson Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 18 ++++++++++++++++++ include/linux/mfd/cros_ec_commands.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index d641a18abacb..76f7ef4d3a0d 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -109,6 +109,10 @@ struct cros_ec_command { * should check msg.result for the EC's result code. * @pkt_xfer: send packet to EC and get response * @lock: one transaction at a time + * @mkbp_event_supported: true if this EC supports the MKBP event protocol. + * @event_notifier: interrupt event notifier for transport devices. + * @event_data: raw payload transferred with the MKBP event. + * @event_size: size in bytes of the event data. */ struct cros_ec_device { @@ -137,6 +141,11 @@ struct cros_ec_device { int (*pkt_xfer)(struct cros_ec_device *ec, struct cros_ec_command *msg); struct mutex lock; + bool mkbp_event_supported; + struct blocking_notifier_head event_notifier; + + struct ec_response_get_next_event event_data; + int event_size; }; /* struct cros_ec_platform - ChromeOS EC platform information @@ -269,6 +278,15 @@ int cros_ec_register(struct cros_ec_device *ec_dev); */ int cros_ec_query_all(struct cros_ec_device *ec_dev); +/** + * cros_ec_get_next_event - Fetch next event from the ChromeOS EC + * + * @ec_dev: Device to fetch event from + * + * Returns: 0 on success, Linux error number on failure + */ +int cros_ec_get_next_event(struct cros_ec_device *ec_dev); + /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; extern struct attribute_group cros_ec_lightbar_attr_group; diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 7e7a8d4b4551..76728ff37d01 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1793,6 +1793,40 @@ struct ec_result_keyscan_seq_ctrl { }; } __packed; +/* + * Command for retrieving the next pending MKBP event from the EC device + * + * The device replies with UNAVAILABLE if there aren't any pending events. + */ +#define EC_CMD_GET_NEXT_EVENT 0x67 + +enum ec_mkbp_event { + /* Keyboard matrix changed. The event data is the new matrix state. */ + EC_MKBP_EVENT_KEY_MATRIX = 0, + + /* New host event. The event data is 4 bytes of host event flags. */ + EC_MKBP_EVENT_HOST_EVENT = 1, + + /* New Sensor FIFO data. The event data is fifo_info structure. */ + EC_MKBP_EVENT_SENSOR_FIFO = 2, + + /* Number of MKBP events */ + EC_MKBP_EVENT_COUNT, +}; + +union ec_response_get_next_data { + uint8_t key_matrix[13]; + + /* Unaligned */ + uint32_t host_event; +} __packed; + +struct ec_response_get_next_event { + uint8_t event_type; + /* Followed by event data if any */ + union ec_response_get_next_data data; +} __packed; + /*****************************************************************************/ /* Temperature sensor commands */ -- cgit v1.2.3 From 638fea33aee858cc665297a76f0039e95a28ce0c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 9 Jun 2016 18:47:24 -0700 Subject: Drivers: hv: vmbus: fix the race when querying & updating the percpu list There is a rare race when we remove an entry from the global list hv_context.percpu_list[cpu] in hv_process_channel_removal() -> percpu_channel_deq() -> list_del(): at this time, if vmbus_on_event() -> process_chn_event() -> pcpu_relid2channel() is trying to query the list, we can get the kernel fault. Similarly, we also have the issue in the code path: vmbus_process_offer() -> percpu_channel_enq(). We can resolve the issue by disabling the tasklet when updating the list. The patch also moves vmbus_release_relid() to a later place where the channel has been removed from the per-cpu and the global lists. Reported-by: Rolf Neugebauer Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b10954a66939..50f493eedeb5 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1357,6 +1357,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, struct icmsg_negotiate *, u8 *, int, int); +void hv_event_tasklet_disable(struct vmbus_channel *channel); +void hv_event_tasklet_enable(struct vmbus_channel *channel); + void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); /* -- cgit v1.2.3 From 3724287c0ec472815ebe5ae3790f77965c6aa557 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 1 Jul 2016 16:26:37 -0700 Subject: Drivers: hv: vmbus: Implement a mechanism to tag the channel for low latency On Hyper-V, performance critical channels use the monitor mechanism to signal the host when the guest posts mesages for the host. This mechanism minimizes the hypervisor intercepts and also makes the host more efficient in that each time the host is woken up, it processes a batch of messages as opposed to just one. The goal here is improve the throughput and this is at the expense of increased latency. Implement a mechanism to let the client driver decide if latency is important. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 50f493eedeb5..755e8f566a47 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -850,6 +850,31 @@ struct vmbus_channel { * ring lock to preserve the current behavior. */ bool acquire_ring_lock; + /* + * For performance critical channels (storage, networking + * etc,), Hyper-V has a mechanism to enhance the throughput + * at the expense of latency: + * When the host is to be signaled, we just set a bit in a shared page + * and this bit will be inspected by the hypervisor within a certain + * window and if the bit is set, the host will be signaled. The window + * of time is the monitor latency - currently around 100 usecs. This + * mechanism improves throughput by: + * + * A) Making the host more efficient - each time it wakes up, + * potentially it will process morev number of packets. The + * monitor latency allows a batch to build up. + * B) By deferring the hypercall to signal, we will also minimize + * the interrupts. + * + * Clearly, these optimizations improve throughput at the expense of + * latency. Furthermore, since the channel is shared for both + * control and data messages, control messages currently suffer + * unnecessary latency adversley impacting performance and boot + * time. To fix this issue, permit tagging the channel as being + * in "low latency" mode. In this mode, we will bypass the monitor + * mechanism. + */ + bool low_latency; }; @@ -891,6 +916,16 @@ static inline void set_channel_pending_send_size(struct vmbus_channel *c, c->outbound.ring_buffer->pending_send_sz = size; } +static inline void set_low_latency_mode(struct vmbus_channel *c) +{ + c->low_latency = true; +} + +static inline void clear_low_latency_mode(struct vmbus_channel *c) +{ + c->low_latency = false; +} + void vmbus_onmessage(void *context); int vmbus_request_offers(void); -- cgit v1.2.3 From 68905d73df5d51ba1318a0ad3f3c5a65c4e646a2 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 25 Aug 2016 15:19:10 -0600 Subject: coresight: etm-perf: pass struct perf_event to source::enable/disable() With this commit [1] address range filter information is now found in the struct hw_perf_event::addr_filters. As such pass the event itself to the coresight_source::enable/disable() functions so that both event attribute and filter can be accessible for configuration. [1] 'commit 375637bc5249 ("perf/core: Introduce address range filtering")' Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 385d62e64abb..2a5982c37dfb 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -232,8 +232,9 @@ struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); int (*trace_id)(struct coresight_device *csdev); int (*enable)(struct coresight_device *csdev, - struct perf_event_attr *attr, u32 mode); - void (*disable)(struct coresight_device *csdev); + struct perf_event *event, u32 mode); + void (*disable)(struct coresight_device *csdev, + struct perf_event *event); }; struct coresight_ops { -- cgit v1.2.3 From 68d9671a845e11c4eec1976876c1af5687a55279 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 26 Aug 2016 09:34:59 +0200 Subject: mcb: Introduce type safety for to_mcb_* The to_mcb_{bus,device,driver}() macros lacked type safety, so convert them to inline functions to enforce compile time type checking. Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- include/linux/mcb.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mcb.h b/include/linux/mcb.h index ead13d233a97..a6733d3fe72b 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -41,7 +41,11 @@ struct mcb_bus { char name[CHAMELEON_FILENAME_LEN + 1]; int (*get_irq)(struct mcb_device *dev); }; -#define to_mcb_bus(b) container_of((b), struct mcb_bus, dev) + +static inline struct mcb_bus *to_mcb_bus(struct device *dev) +{ + return container_of(dev, struct mcb_bus, dev); +} /** * struct mcb_device - MEN Chameleon Bus device @@ -77,7 +81,11 @@ struct mcb_device { struct resource irq; struct resource mem; }; -#define to_mcb_device(x) container_of((x), struct mcb_device, dev) + +static inline struct mcb_device *to_mcb_device(struct device *dev) +{ + return container_of(dev, struct mcb_device, dev); +} /** * struct mcb_driver - MEN Chameleon Bus device driver @@ -95,7 +103,11 @@ struct mcb_driver { void (*remove)(struct mcb_device *mdev); void (*shutdown)(struct mcb_device *mdev); }; -#define to_mcb_driver(x) container_of((x), struct mcb_driver, driver) + +static inline struct mcb_driver *to_mcb_driver(struct device_driver *drv) +{ + return container_of(drv, struct mcb_driver, driver); +} static inline void *mcb_get_drvdata(struct mcb_device *dev) { -- cgit v1.2.3 From b58189b3c5cf54689a0c154e4d9cff918a29ab0b Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 26 Aug 2016 09:35:00 +0200 Subject: mcb: remove sub-device handling code The MEN Chameleon specification states that a chameleon FPGA can include a bridge descriptor, which then opens up a new bus behind this bridge. MCB included subdevice handling code in the core, but no support for bus descriptors in the parser, due to a lack of hardware access. As this is technically dead code, but it gets executed on a device add, I've decided to remove it. Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- include/linux/mcb.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mcb.h b/include/linux/mcb.h index a6733d3fe72b..ee5200d660b0 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -50,10 +50,8 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev) /** * struct mcb_device - MEN Chameleon Bus device * - * @bus_list: internal list handling for bus code * @dev: device in kernel representation * @bus: mcb bus the device is plugged to - * @subordinate: subordinate MCBus in case of bridge * @is_added: flag to check if device is added to bus * @driver: associated mcb_driver * @id: mcb device id @@ -66,10 +64,8 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev) * @memory: memory resource */ struct mcb_device { - struct list_head bus_list; struct device dev; struct mcb_bus *bus; - struct mcb_bus *subordinate; bool is_added; struct mcb_driver *driver; u16 id; -- cgit v1.2.3 From fa54b326803d91b04705a6adf0ff963593a9fe5c Mon Sep 17 00:00:00 2001 From: Aaron Sierra Date: Fri, 29 Apr 2016 16:41:02 -0500 Subject: vme: change LM callback argument to void pointer Make the location monitor callback function prototype more useful by changing the argument from an integer to a void pointer. All VME bridge drivers were simply passing the location monitor index (e.g. 0-3) as the argument to these callbacks. It is much more useful to pass back a pointer to data that the callback-registering driver cares about. There appear to be no in-kernel callers of vme_lm_attach (or vme_lme_request for that matter), so this change only affects the VME subsystem and bridge drivers. This has been tested with Tsi148 hardware, but the CA91Cx42 changes have only been compiled. Signed-off-by: Aaron Sierra Acked-by: Martyn Welch Signed-off-by: Greg Kroah-Hartman --- include/linux/vme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vme.h b/include/linux/vme.h index 71e4a6dec5ac..ea6095deba20 100644 --- a/include/linux/vme.h +++ b/include/linux/vme.h @@ -166,7 +166,7 @@ struct vme_resource *vme_lm_request(struct vme_dev *); int vme_lm_count(struct vme_resource *); int vme_lm_set(struct vme_resource *, unsigned long long, u32, u32); int vme_lm_get(struct vme_resource *, unsigned long long *, u32 *, u32 *); -int vme_lm_attach(struct vme_resource *, int, void (*callback)(int)); +int vme_lm_attach(struct vme_resource *, int, void (*callback)(void *), void *); int vme_lm_detach(struct vme_resource *, int); void vme_lm_free(struct vme_resource *); -- cgit v1.2.3 From ca75d601b5942e8cf5eed68f308be0526d0dac5d Mon Sep 17 00:00:00 2001 From: PrasannaKumar Muralidharan Date: Thu, 25 Aug 2016 22:30:49 +0530 Subject: miscdevice: Add helper macro for misc device boilerplate Many modules call misc_register and misc_deregister in its module init and exit methods without any additional code. This ends up being boilerplate. This patch adds helper macro module_misc_device(), that replaces module_init()/ module_exit() with template functions. This patch also converts drivers to use new macro. Change since v1: Add device.h include in miscdevice.h as module_driver macro was not available from other include files in some architectures. Signed-off-by: PrasannaKumar Muralidharan Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 543037465973..722698a43d79 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -3,6 +3,7 @@ #include #include #include +#include /* * These allocations are managed by device@lanana.org. If you use an @@ -70,6 +71,13 @@ struct miscdevice { extern int misc_register(struct miscdevice *misc); extern void misc_deregister(struct miscdevice *misc); +/* + * Helper macro for drivers that don't do anything special in module init / exit + * call. This helps in eleminating of boilerplate code. + */ +#define module_misc_device(__misc_device) \ + module_driver(__misc_device, misc_register, misc_deregister) + #define MODULE_ALIAS_MISCDEV(minor) \ MODULE_ALIAS("char-major-" __stringify(MISC_MAJOR) \ "-" __stringify(minor)) -- cgit v1.2.3 From dc21c7ad3a8aad79cb14128c321833a47dc921c2 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 31 Aug 2016 14:28:10 +0530 Subject: mfd: lp873x: Add lp873x PMIC support The LP873X chip is a power management IC for Portable Navigation Systems and Tablet Computing devices. It contains the following components: - Regulators. - Configurable General Purpose Output Signals (GPO). PMIC interacts with the main processor through i2c. PMIC has couple of LDOs (Linear Regulators), couple of BUCKs (Step-Down DC-DC Converter Cores) and GPOs (General Purpose Output Signals). Signed-off-by: Keerthy Signed-off-by: Lee Jones --- include/linux/mfd/lp873x.h | 269 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 include/linux/mfd/lp873x.h (limited to 'include/linux') diff --git a/include/linux/mfd/lp873x.h b/include/linux/mfd/lp873x.h new file mode 100644 index 000000000000..83b1bd7588be --- /dev/null +++ b/include/linux/mfd/lp873x.h @@ -0,0 +1,269 @@ +/* + * Functions to access LP873X power management chip. + * + * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_MFD_LP873X_H +#define __LINUX_MFD_LP873X_H + +#include +#include +#include + +/* LP873x chip id list */ +#define LP873X 0x00 + +/* All register addresses */ +#define LP873X_REG_DEV_REV 0X00 +#define LP873X_REG_OTP_REV 0X01 +#define LP873X_REG_BUCK0_CTRL_1 0X02 +#define LP873X_REG_BUCK0_CTRL_2 0X03 +#define LP873X_REG_BUCK1_CTRL_1 0X04 +#define LP873X_REG_BUCK1_CTRL_2 0X05 +#define LP873X_REG_BUCK0_VOUT 0X06 +#define LP873X_REG_BUCK1_VOUT 0X07 +#define LP873X_REG_LDO0_CTRL 0X08 +#define LP873X_REG_LDO1_CTRL 0X09 +#define LP873X_REG_LDO0_VOUT 0X0A +#define LP873X_REG_LDO1_VOUT 0X0B +#define LP873X_REG_BUCK0_DELAY 0X0C +#define LP873X_REG_BUCK1_DELAY 0X0D +#define LP873X_REG_LDO0_DELAY 0X0E +#define LP873X_REG_LDO1_DELAY 0X0F +#define LP873X_REG_GPO_DELAY 0X10 +#define LP873X_REG_GPO2_DELAY 0X11 +#define LP873X_REG_GPO_CTRL 0X12 +#define LP873X_REG_CONFIG 0X13 +#define LP873X_REG_PLL_CTRL 0X14 +#define LP873X_REG_PGOOD_CTRL1 0X15 +#define LP873X_REG_PGOOD_CTRL2 0X16 +#define LP873X_REG_PG_FAULT 0X17 +#define LP873X_REG_RESET 0X18 +#define LP873X_REG_INT_TOP_1 0X19 +#define LP873X_REG_INT_TOP_2 0X1A +#define LP873X_REG_INT_BUCK 0X1B +#define LP873X_REG_INT_LDO 0X1C +#define LP873X_REG_TOP_STAT 0X1D +#define LP873X_REG_BUCK_STAT 0X1E +#define LP873X_REG_LDO_STAT 0x1F +#define LP873X_REG_TOP_MASK_1 0x20 +#define LP873X_REG_TOP_MASK_2 0x21 +#define LP873X_REG_BUCK_MASK 0x22 +#define LP873X_REG_LDO_MASK 0x23 +#define LP873X_REG_SEL_I_LOAD 0x24 +#define LP873X_REG_I_LOAD_2 0x25 +#define LP873X_REG_I_LOAD_1 0x26 + +#define LP873X_REG_MAX LP873X_REG_I_LOAD_1 + +/* Register field definitions */ +#define LP873X_DEV_REV_DEV_ID 0xC0 +#define LP873X_DEV_REV_ALL_LAYER 0x30 +#define LP873X_DEV_REV_METAL_LAYER 0x0F + +#define LP873X_OTP_REV_OTP_ID 0xFF + +#define LP873X_BUCK0_CTRL_1_BUCK0_FPWM BIT(3) +#define LP873X_BUCK0_CTRL_1_BUCK0_RDIS_EN BIT(2) +#define LP873X_BUCK0_CTRL_1_BUCK0_EN_PIN_CTRL BIT(1) +#define LP873X_BUCK0_CTRL_1_BUCK0_EN BIT(0) + +#define LP873X_BUCK0_CTRL_2_BUCK0_ILIM 0x38 +#define LP873X_BUCK0_CTRL_2_BUCK0_SLEW_RATE 0x07 + +#define LP873X_BUCK1_CTRL_1_BUCK1_FPWM BIT(3) +#define LP873X_BUCK1_CTRL_1_BUCK1_RDIS_EN BIT(2) +#define LP873X_BUCK1_CTRL_1_BUCK1_EN_PIN_CTRL BIT(1) +#define LP873X_BUCK1_CTRL_1_BUCK1_EN BIT(0) + +#define LP873X_BUCK1_CTRL_2_BUCK1_ILIM 0x38 +#define LP873X_BUCK1_CTRL_2_BUCK1_SLEW_RATE 0x07 + +#define LP873X_BUCK0_VOUT_BUCK0_VSET 0xFF + +#define LP873X_BUCK1_VOUT_BUCK1_VSET 0xFF + +#define LP873X_LDO0_CTRL_LDO0_RDIS_EN BIT(2) +#define LP873X_LDO0_CTRL_LDO0_EN_PIN_CTRL BIT(1) +#define LP873X_LDO0_CTRL_LDO0_EN BIT(0) + +#define LP873X_LDO1_CTRL_LDO1_RDIS_EN BIT(2) +#define LP873X_LDO1_CTRL_LDO1_EN_PIN_CTRL BIT(1) +#define LP873X_LDO1_CTRL_LDO1_EN BIT(0) + +#define LP873X_LDO0_VOUT_LDO0_VSET 0x1F + +#define LP873X_LDO1_VOUT_LDO1_VSET 0x1F + +#define LP873X_BUCK0_DELAY_BUCK0_SD_DELAY 0xF0 +#define LP873X_BUCK0_DELAY_BUCK0_SU_DELAY 0x0F + +#define LP873X_BUCK1_DELAY_BUCK1_SD_DELAY 0xF0 +#define LP873X_BUCK1_DELAY_BUCK1_SU_DELAY 0x0F + +#define LP873X_LDO0_DELAY_LDO0_SD_DELAY 0xF0 +#define LP873X_LDO0_DELAY_LDO0_SU_DELAY 0x0F + +#define LP873X_LDO1_DELAY_LDO1_SD_DELAY 0xF0 +#define LP873X_LDO1_DELAY_LDO1_SU_DELAY 0x0F + +#define LP873X_GPO_DELAY_GPO_SD_DELAY 0xF0 +#define LP873X_GPO_DELAY_GPO_SU_DELAY 0x0F + +#define LP873X_GPO2_DELAY_GPO2_SD_DELAY 0xF0 +#define LP873X_GPO2_DELAY_GPO2_SU_DELAY 0x0F + +#define LP873X_GPO_CTRL_GPO2_OD BIT(6) +#define LP873X_GPO_CTRL_GPO2_EN_PIN_CTRL BIT(5) +#define LP873X_GPO_CTRL_GPO2_EN BIT(4) +#define LP873X_GPO_CTRL_GPO_OD BIT(2) +#define LP873X_GPO_CTRL_GPO_EN_PIN_CTRL BIT(1) +#define LP873X_GPO_CTRL_GPO_EN BIT(0) + +#define LP873X_CONFIG_SU_DELAY_SEL BIT(6) +#define LP873X_CONFIG_SD_DELAY_SEL BIT(5) +#define LP873X_CONFIG_CLKIN_PIN_SEL BIT(4) +#define LP873X_CONFIG_CLKIN_PD BIT(3) +#define LP873X_CONFIG_EN_PD BIT(2) +#define LP873X_CONFIG_TDIE_WARN_LEVEL BIT(1) +#define LP873X_EN_SPREAD_SPEC BIT(0) + +#define LP873X_PLL_CTRL_EN_PLL BIT(6) +#define LP873X_EXT_CLK_FREQ 0x1F + +#define LP873X_PGOOD_CTRL1_PGOOD_POL BIT(7) +#define LP873X_PGOOD_CTRL1_PGOOD_OD BIT(6) +#define LP873X_PGOOD_CTRL1_PGOOD_WINDOW_LDO BIT(5) +#define LP873X_PGOOD_CTRL1_PGOOD_WINDOWN_BUCK BIT(4) +#define LP873X_PGOOD_CTRL1_PGOOD_EN_PGOOD_LDO1 BIT(3) +#define LP873X_PGOOD_CTRL1_PGOOD_EN_PGOOD_LDO0 BIT(2) +#define LP873X_PGOOD_CTRL1_PGOOD_EN_PGOOD_BUCK1 BIT(1) +#define LP873X_PGOOD_CTRL1_PGOOD_EN_PGOOD_BUCK0 BIT(0) + +#define LP873X_PGOOD_CTRL2_EN_PGOOD_TWARN BIT(2) +#define LP873X_PGOOD_CTRL2_EN_PG_FAULT_GATE BIT(1) +#define LP873X_PGOOD_CTRL2_PGOOD_MODE BIT(0) + +#define LP873X_PG_FAULT_PG_FAULT_LDO1 BIT(3) +#define LP873X_PG_FAULT_PG_FAULT_LDO0 BIT(2) +#define LP873X_PG_FAULT_PG_FAULT_BUCK1 BIT(1) +#define LP873X_PG_FAULT_PG_FAULT_BUCK0 BIT(0) + +#define LP873X_RESET_SW_RESET BIT(0) + +#define LP873X_INT_TOP_1_PGOOD_INT BIT(7) +#define LP873X_INT_TOP_1_LDO_INT BIT(6) +#define LP873X_INT_TOP_1_BUCK_INT BIT(5) +#define LP873X_INT_TOP_1_SYNC_CLK_INT BIT(4) +#define LP873X_INT_TOP_1_TDIE_SD_INT BIT(3) +#define LP873X_INT_TOP_1_TDIE_WARN_INT BIT(2) +#define LP873X_INT_TOP_1_OVP_INT BIT(1) +#define LP873X_INT_TOP_1_I_MEAS_INT BIT(0) + +#define LP873X_INT_TOP_2_RESET_REG_INT BIT(0) + +#define LP873X_INT_BUCK_BUCK1_PG_INT BIT(6) +#define LP873X_INT_BUCK_BUCK1_SC_INT BIT(5) +#define LP873X_INT_BUCK_BUCK1_ILIM_INT BIT(4) +#define LP873X_INT_BUCK_BUCK0_PG_INT BIT(2) +#define LP873X_INT_BUCK_BUCK0_SC_INT BIT(1) +#define LP873X_INT_BUCK_BUCK0_ILIM_INT BIT(0) + +#define LP873X_INT_LDO_LDO1_PG_INT BIT(6) +#define LP873X_INT_LDO_LDO1_SC_INT BIT(5) +#define LP873X_INT_LDO_LDO1_ILIM_INT BIT(4) +#define LP873X_INT_LDO_LDO0_PG_INT BIT(2) +#define LP873X_INT_LDO_LDO0_SC_INT BIT(1) +#define LP873X_INT_LDO_LDO0_ILIM_INT BIT(0) + +#define LP873X_TOP_STAT_PGOOD_STAT BIT(7) +#define LP873X_TOP_STAT_SYNC_CLK_STAT BIT(4) +#define LP873X_TOP_STAT_TDIE_SD_STAT BIT(3) +#define LP873X_TOP_STAT_TDIE_WARN_STAT BIT(2) +#define LP873X_TOP_STAT_OVP_STAT BIT(1) + +#define LP873X_BUCK_STAT_BUCK1_STAT BIT(7) +#define LP873X_BUCK_STAT_BUCK1_PG_STAT BIT(6) +#define LP873X_BUCK_STAT_BUCK1_ILIM_STAT BIT(4) +#define LP873X_BUCK_STAT_BUCK0_STAT BIT(3) +#define LP873X_BUCK_STAT_BUCK0_PG_STAT BIT(2) +#define LP873X_BUCK_STAT_BUCK0_ILIM_STAT BIT(0) + +#define LP873X_LDO_STAT_LDO1_STAT BIT(7) +#define LP873X_LDO_STAT_LDO1_PG_STAT BIT(6) +#define LP873X_LDO_STAT_LDO1_ILIM_STAT BIT(4) +#define LP873X_LDO_STAT_LDO0_STAT BIT(3) +#define LP873X_LDO_STAT_LDO0_PG_STAT BIT(2) +#define LP873X_LDO_STAT_LDO0_ILIM_STAT BIT(0) + +#define LP873X_TOP_MASK_1_PGOOD_INT_MASK BIT(7) +#define LP873X_TOP_MASK_1_SYNC_CLK_MASK BIT(4) +#define LP873X_TOP_MASK_1_TDIE_WARN_MASK BIT(2) +#define LP873X_TOP_MASK_1_I_MEAS_MASK BIT(0) + +#define LP873X_TOP_MASK_2_RESET_REG_MASK BIT(0) + +#define LP873X_BUCK_MASK_BUCK1_PGF_MASK BIT(7) +#define LP873X_BUCK_MASK_BUCK1_PGR_MASK BIT(6) +#define LP873X_BUCK_MASK_BUCK1_ILIM_MASK BIT(4) +#define LP873X_BUCK_MASK_BUCK0_PGF_MASK BIT(3) +#define LP873X_BUCK_MASK_BUCK0_PGR_MASK BIT(2) +#define LP873X_BUCK_MASK_BUCK0_ILIM_MASK BIT(0) + +#define LP873X_LDO_MASK_LDO1_PGF_MASK BIT(7) +#define LP873X_LDO_MASK_LDO1_PGR_MASK BIT(6) +#define LP873X_LDO_MASK_LDO1_ILIM_MASK BIT(4) +#define LP873X_LDO_MASK_LDO0_PGF_MASK BIT(3) +#define LP873X_LDO_MASK_LDO0_PGR_MASK BIT(2) +#define LP873X_LDO_MASK_LDO0_ILIM_MASK BIT(0) + +#define LP873X_SEL_I_LOAD_CURRENT_BUCK_SELECT BIT(0) + +#define LP873X_I_LOAD_2_BUCK_LOAD_CURRENT BIT(0) + +#define LP873X_I_LOAD_1_BUCK_LOAD_CURRENT 0xFF + +#define LP873X_MAX_REG_ID LP873X_LDO_1 + +/* Number of step-down converters available */ +#define LP873X_NUM_BUCK 2 +/* Number of LDO voltage regulators available */ +#define LP873X_NUM_LDO 2 +/* Number of total regulators available */ +#define LP873X_NUM_REGULATOR (LP873X_NUM_BUCK + LP873X_NUM_LDO) + +enum lp873x_regulator_id { + /* BUCK's */ + LP873X_BUCK_0, + LP873X_BUCK_1, + /* LDOs */ + LP873X_LDO_0, + LP873X_LDO_1, +}; + +/** + * struct lp873x - state holder for the lp873x driver + * @dev: struct device pointer for MFD device + * @rev: revision of the lp873x + * @lock: lock guarding the data structure + * @regmap: register map of the lp873x PMIC + * + * Device data may be used to access the LP873X chip + */ +struct lp873x { + struct device *dev; + u8 rev; + struct mutex lock; /* lock guarding the data structure */ + struct regmap *regmap; +}; +#endif /* __LINUX_MFD_LP873X_H */ -- cgit v1.2.3 From 537f7ccb396804c6d0057b93ba8eb104ba44f851 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 8 Aug 2016 14:37:37 -0500 Subject: mntns: Add a limit on the number of mount namespaces. v2: Fixed the very obvious lack of setting ucounts on struct mnt_ns reported by Andrei Vagin, and the kbuild test report. Reported-by: Andrei Vagin Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index c6bc980b06a9..30ffe10cda18 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -30,6 +30,7 @@ enum ucount_type { UCOUNT_UTS_NAMESPACES, UCOUNT_IPC_NAMESPACES, UCOUNT_NET_NAMESPACES, + UCOUNT_MNT_NAMESPACES, UCOUNT_CGROUP_NAMESPACES, UCOUNT_COUNTS, }; -- cgit v1.2.3 From 2eedcbfc0612c87e22c6325fde49ecf140e5873a Mon Sep 17 00:00:00 2001 From: Wadim Egorov Date: Mon, 29 Aug 2016 13:07:58 +0200 Subject: mfd: rk808: Add RK818 support The RK818 chip is a Power Management IC (PMIC) for multimedia and handheld devices. It contains the following components: - Regulators - RTC - Clocking - Battery support Both RK808 and RK818 chips are using a similar register map, so we can reuse the RTC and Clocking functionality. Signed-off-by: Wadim Egorov Tested-by: Andy Yan Signed-off-by: Lee Jones --- include/linux/mfd/rk808.h | 154 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 145 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index 441b6ee72691..fc5db6fcb57d 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -1,11 +1,15 @@ /* - * rk808.h for Rockchip RK808 + * Register definitions for Rockchip's RK808/RK818 PMIC * * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd * * Author: Chris Zhong * Author: Zhang Qing * + * Copyright (C) 2016 PHYTEC Messtechnik GmbH + * + * Author: Wadim Egorov + * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. @@ -16,8 +20,8 @@ * more details. */ -#ifndef __LINUX_REGULATOR_rk808_H -#define __LINUX_REGULATOR_rk808_H +#ifndef __LINUX_REGULATOR_RK808_H +#define __LINUX_REGULATOR_RK808_H #include #include @@ -28,7 +32,7 @@ #define RK808_DCDC1 0 /* (0+RK808_START) */ #define RK808_LDO1 4 /* (4+RK808_START) */ -#define RK808_NUM_REGULATORS 14 +#define RK808_NUM_REGULATORS 14 enum rk808_reg { RK808_ID_DCDC1, @@ -65,6 +69,8 @@ enum rk808_reg { #define RK808_RTC_INT_REG 0x12 #define RK808_RTC_COMP_LSB_REG 0x13 #define RK808_RTC_COMP_MSB_REG 0x14 +#define RK808_ID_MSB 0x17 +#define RK808_ID_LSB 0x18 #define RK808_CLK32OUT_REG 0x20 #define RK808_VB_MON_REG 0x21 #define RK808_THERMAL_REG 0x22 @@ -115,7 +121,92 @@ enum rk808_reg { #define RK808_INT_STS_MSK_REG2 0x4f #define RK808_IO_POL_REG 0x50 -/* IRQ Definitions */ +/* RK818 */ +#define RK818_DCDC1 0 +#define RK818_LDO1 4 +#define RK818_NUM_REGULATORS 17 + +enum rk818_reg { + RK818_ID_DCDC1, + RK818_ID_DCDC2, + RK818_ID_DCDC3, + RK818_ID_DCDC4, + RK818_ID_BOOST, + RK818_ID_LDO1, + RK818_ID_LDO2, + RK818_ID_LDO3, + RK818_ID_LDO4, + RK818_ID_LDO5, + RK818_ID_LDO6, + RK818_ID_LDO7, + RK818_ID_LDO8, + RK818_ID_LDO9, + RK818_ID_SWITCH, + RK818_ID_HDMI_SWITCH, + RK818_ID_OTG_SWITCH, +}; + +#define RK818_DCDC_EN_REG 0x23 +#define RK818_LDO_EN_REG 0x24 +#define RK818_SLEEP_SET_OFF_REG1 0x25 +#define RK818_SLEEP_SET_OFF_REG2 0x26 +#define RK818_DCDC_UV_STS_REG 0x27 +#define RK818_DCDC_UV_ACT_REG 0x28 +#define RK818_LDO_UV_STS_REG 0x29 +#define RK818_LDO_UV_ACT_REG 0x2a +#define RK818_DCDC_PG_REG 0x2b +#define RK818_LDO_PG_REG 0x2c +#define RK818_VOUT_MON_TDB_REG 0x2d +#define RK818_BUCK1_CONFIG_REG 0x2e +#define RK818_BUCK1_ON_VSEL_REG 0x2f +#define RK818_BUCK1_SLP_VSEL_REG 0x30 +#define RK818_BUCK2_CONFIG_REG 0x32 +#define RK818_BUCK2_ON_VSEL_REG 0x33 +#define RK818_BUCK2_SLP_VSEL_REG 0x34 +#define RK818_BUCK3_CONFIG_REG 0x36 +#define RK818_BUCK4_CONFIG_REG 0x37 +#define RK818_BUCK4_ON_VSEL_REG 0x38 +#define RK818_BUCK4_SLP_VSEL_REG 0x39 +#define RK818_BOOST_CONFIG_REG 0x3a +#define RK818_LDO1_ON_VSEL_REG 0x3b +#define RK818_LDO1_SLP_VSEL_REG 0x3c +#define RK818_LDO2_ON_VSEL_REG 0x3d +#define RK818_LDO2_SLP_VSEL_REG 0x3e +#define RK818_LDO3_ON_VSEL_REG 0x3f +#define RK818_LDO3_SLP_VSEL_REG 0x40 +#define RK818_LDO4_ON_VSEL_REG 0x41 +#define RK818_LDO4_SLP_VSEL_REG 0x42 +#define RK818_LDO5_ON_VSEL_REG 0x43 +#define RK818_LDO5_SLP_VSEL_REG 0x44 +#define RK818_LDO6_ON_VSEL_REG 0x45 +#define RK818_LDO6_SLP_VSEL_REG 0x46 +#define RK818_LDO7_ON_VSEL_REG 0x47 +#define RK818_LDO7_SLP_VSEL_REG 0x48 +#define RK818_LDO8_ON_VSEL_REG 0x49 +#define RK818_LDO8_SLP_VSEL_REG 0x4a +#define RK818_BOOST_LDO9_ON_VSEL_REG 0x54 +#define RK818_BOOST_LDO9_SLP_VSEL_REG 0x55 +#define RK818_DEVCTRL_REG 0x4b +#define RK818_INT_STS_REG1 0X4c +#define RK818_INT_STS_MSK_REG1 0x4d +#define RK818_INT_STS_REG2 0x4e +#define RK818_INT_STS_MSK_REG2 0x4f +#define RK818_IO_POL_REG 0x50 +#define RK818_H5V_EN_REG 0x52 +#define RK818_SLEEP_SET_OFF_REG3 0x53 +#define RK818_BOOST_LDO9_ON_VSEL_REG 0x54 +#define RK818_BOOST_LDO9_SLP_VSEL_REG 0x55 +#define RK818_BOOST_CTRL_REG 0x56 +#define RK818_DCDC_ILMAX 0x90 +#define RK818_USB_CTRL_REG 0xa1 + +#define RK818_H5V_EN BIT(0) +#define RK818_REF_RDY_CTRL BIT(1) +#define RK818_USB_ILIM_SEL_MASK 0xf +#define RK818_USB_ILMIN_2000MA 0x7 +#define RK818_USB_CHG_SD_VSEL_MASK 0x70 + +/* RK808 IRQ Definitions */ #define RK808_IRQ_VOUT_LO 0 #define RK808_IRQ_VB_LO 1 #define RK808_IRQ_PWRON 2 @@ -137,6 +228,43 @@ enum rk808_reg { #define RK808_IRQ_PLUG_IN_INT_MSK BIT(0) #define RK808_IRQ_PLUG_OUT_INT_MSK BIT(1) +/* RK818 IRQ Definitions */ +#define RK818_IRQ_VOUT_LO 0 +#define RK818_IRQ_VB_LO 1 +#define RK818_IRQ_PWRON 2 +#define RK818_IRQ_PWRON_LP 3 +#define RK818_IRQ_HOTDIE 4 +#define RK818_IRQ_RTC_ALARM 5 +#define RK818_IRQ_RTC_PERIOD 6 +#define RK818_IRQ_USB_OV 7 +#define RK818_IRQ_PLUG_IN 8 +#define RK818_IRQ_PLUG_OUT 9 +#define RK818_IRQ_CHG_OK 10 +#define RK818_IRQ_CHG_TE 11 +#define RK818_IRQ_CHG_TS1 12 +#define RK818_IRQ_TS2 13 +#define RK818_IRQ_CHG_CVTLIM 14 +#define RK818_IRQ_DISCHG_ILIM 7 + +#define RK818_IRQ_VOUT_LO_MSK BIT(0) +#define RK818_IRQ_VB_LO_MSK BIT(1) +#define RK818_IRQ_PWRON_MSK BIT(2) +#define RK818_IRQ_PWRON_LP_MSK BIT(3) +#define RK818_IRQ_HOTDIE_MSK BIT(4) +#define RK818_IRQ_RTC_ALARM_MSK BIT(5) +#define RK818_IRQ_RTC_PERIOD_MSK BIT(6) +#define RK818_IRQ_USB_OV_MSK BIT(7) +#define RK818_IRQ_PLUG_IN_MSK BIT(0) +#define RK818_IRQ_PLUG_OUT_MSK BIT(1) +#define RK818_IRQ_CHG_OK_MSK BIT(2) +#define RK818_IRQ_CHG_TE_MSK BIT(3) +#define RK818_IRQ_CHG_TS1_MSK BIT(4) +#define RK818_IRQ_TS2_MSK BIT(5) +#define RK818_IRQ_CHG_CVTLIM_MSK BIT(6) +#define RK818_IRQ_DISCHG_ILIM_MSK BIT(7) + +#define RK818_NUM_IRQ 16 + #define RK808_VBAT_LOW_2V8 0x00 #define RK808_VBAT_LOW_2V9 0x01 #define RK808_VBAT_LOW_3V0 0x02 @@ -191,9 +319,17 @@ enum { BOOST_ILMIN_250MA, }; +enum { + RK808_ID = 0x0000, + RK818_ID = 0x8181, +}; + struct rk808 { - struct i2c_client *i2c; - struct regmap_irq_chip_data *irq_data; - struct regmap *regmap; + struct i2c_client *i2c; + struct regmap_irq_chip_data *irq_data; + struct regmap *regmap; + long variant; + const struct regmap_config *regmap_cfg; + const struct regmap_irq_chip *regmap_irq_chip; }; -#endif /* __LINUX_REGULATOR_rk808_H */ +#endif /* __LINUX_REGULATOR_RK808_H */ -- cgit v1.2.3 From 0e125a5facf857567f8bb6dbb1ceefac14b2fa64 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 8 Jul 2016 17:00:39 +0800 Subject: tty: amba-pl011: define flag register bits for ZTE device For some reason we do not really understand, ZTE hardware designers choose to define PL011 Flag Register bit positions differently from standard ones as below. Bit Standard ZTE ----------------------------------- CTS 0 1 DSR 1 3 BUSY 3 8 RI 8 0 Let's define these bits into vendor data and get ZTE PL011 supported properly. Signed-off-by: Shawn Guo Acked-by: Russell King Signed-off-by: Greg Kroah-Hartman --- include/linux/amba/serial.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index d76a19ba2cff..ad0965e21a5e 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -104,6 +104,15 @@ #define UART01x_FR_CTS 0x001 #define UART01x_FR_TMSK (UART01x_FR_TXFF + UART01x_FR_BUSY) +/* + * Some bits of Flag Register on ZTE device have different position from + * standard ones. + */ +#define ZX_UART01x_FR_BUSY 0x100 +#define ZX_UART01x_FR_DSR 0x008 +#define ZX_UART01x_FR_CTS 0x002 +#define ZX_UART011_FR_RI 0x001 + #define UART011_CR_CTSEN 0x8000 /* CTS hardware flow control */ #define UART011_CR_RTSEN 0x4000 /* RTS hardware flow control */ #define UART011_CR_OUT2 0x2000 /* OUT2 */ -- cgit v1.2.3 From 2426fbc77faef57c80a573303af6543531e2efa3 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 8 Jul 2016 17:00:41 +0800 Subject: tty: amba-pl011: probe ZTE device from AMBA bus with a pseudo-ID There is no Peripheral Identification Registers on ZTE PL011 device, so although the driver amba-pl011 is ready to work for ZTE device, the device cannot be probed by the driver at all. With arm,primecell-periphid DT bindings (bindings/arm/primecell.txt) in place, it should be the cleanest the way to use a pseudo-ID to probe the device from AMBA bus. We create an unofficial vendor number AMBA_VENDOR_LINUX, which will practically never become an official vendor ID, and takes Configuration, Revision number, and Part number as input to compose a pseudo-ID for ZTE device. Also, since we start using vendor_zte to probe ZTE device, the __maybe_unused for vendor_zte is removed. Signed-off-by: Russell King Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- include/linux/amba/bus.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 3d8dcdd1aeae..d143c13bed26 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -53,8 +53,14 @@ enum amba_vendor { AMBA_VENDOR_ST = 0x80, AMBA_VENDOR_QCOM = 0x51, AMBA_VENDOR_LSI = 0xb6, + AMBA_VENDOR_LINUX = 0xfe, /* This value is not official */ }; +/* This is used to generate pseudo-ID for AMBA device */ +#define AMBA_LINUX_ID(conf, rev, part) \ + (((conf) & 0xff) << 24 | ((rev) & 0xf) << 20 | \ + AMBA_VENDOR_LINUX << 12 | ((part) & 0xfff)) + extern struct bus_type amba_bustype; #define to_amba_device(d) container_of(d, struct amba_device, dev) -- cgit v1.2.3 From 5bf5635ac1705b8d58fdef5ff0666ef0e72b4629 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 25 Aug 2016 15:47:56 +0200 Subject: tty/serial: atmel: add fractional baud rate support The USART device provides a fractional baud rate generator to get a more accurate baud rate. It can be used only when the USART is configured in 'normal mode' and this feature is not available on AT91RM9200 SoC. Signed-off-by: Ludovic Desroches Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- include/linux/atmel_serial.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atmel_serial.h b/include/linux/atmel_serial.h index 5a4d664af87a..f8e452aa48d7 100644 --- a/include/linux/atmel_serial.h +++ b/include/linux/atmel_serial.h @@ -118,6 +118,7 @@ #define ATMEL_US_BRGR 0x20 /* Baud Rate Generator Register */ #define ATMEL_US_CD GENMASK(15, 0) /* Clock Divider */ +#define ATMEL_US_FP_OFFSET 16 /* Fractional Part */ #define ATMEL_US_RTOR 0x24 /* Receiver Time-out Register for USART */ #define ATMEL_UA_RTOR 0x28 /* Receiver Time-out Register for UART */ -- cgit v1.2.3 From c072e113b8fbd6b2bf325e92379a0da6dea619b3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 17 Aug 2016 19:20:21 +0300 Subject: dmaengine: dw: set polarity of handshake interface Intel Quark UART uses DesignWare DMA IP. Though the DMA IP is connected in such way that handshake interface uses inverted polarity. We have to provide a possibility to set this in the DMA driver when configuring a channel. Introduce a new member of custom slave configuration called 'hs_polarity' and set active low polarity in case this value is 'true'. Acked-by: Vinod Koul Signed-off-by: Andy Shevchenko Tested-by: Bryan O'Donoghue Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/dma-dw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index d15d8ba8cc24..4636c9335305 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -23,6 +23,7 @@ * @dst_id: dst request line * @m_master: memory master for transfers on allocated channel * @p_master: peripheral master for transfers on allocated channel + * @hs_polarity:set active low polarity of handshake interface */ struct dw_dma_slave { struct device *dma_dev; @@ -30,6 +31,7 @@ struct dw_dma_slave { u8 dst_id; u8 m_master; u8 p_master; + bool hs_polarity; }; /** -- cgit v1.2.3 From 5fb23e35cacffb7b99ed5b97a002ddb8c9144bb6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 17 Aug 2016 19:20:22 +0300 Subject: dmaengine: dw: override LLP support if asked in platform data There are at least two known devices, e.g. DMA controller found on ARC AXS101 SDP board, that have LLP register and no multi block transfer support at the same time. Override autodetection by user provided data. Reported-by: Eugeniy Paltsev Reviewed-by: Eugeniy Paltsev Signed-off-by: Andy Shevchenko Tested-by: Bryan O'Donoghue Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/dma-dw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 4636c9335305..5f0e11e7354c 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -40,6 +40,7 @@ struct dw_dma_slave { * @is_private: The device channels should be marked as private and not for * by the general purpose DMA channel allocator. * @is_memcpy: The device channels do support memory-to-memory transfers. + * @is_nollp: The device channels does not support multi block transfers. * @chan_allocation_order: Allocate channels starting from 0 or 7 * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. * @block_size: Maximum block size supported by the controller @@ -51,6 +52,7 @@ struct dw_dma_platform_data { unsigned int nr_channels; bool is_private; bool is_memcpy; + bool is_nollp; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ #define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ unsigned char chan_allocation_order; -- cgit v1.2.3 From 19d82910c07173872bd2869f0077e290a7a466a6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 17 Aug 2016 19:20:23 +0300 Subject: dmaengine: dw: provide probe(), remove() stubs for users Some users consider DMA optional, thus when driver is not compiled we shouldn't prevent compilation of the users. Add stubs for dw_dma_probe() and dw_dma_remove(). Acked-by: Vinod Koul Signed-off-by: Andy Shevchenko Tested-by: Bryan O'Donoghue Signed-off-by: Greg Kroah-Hartman --- include/linux/dma/dw.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h index f2e538aaddad..ccfd0c3777df 100644 --- a/include/linux/dma/dw.h +++ b/include/linux/dma/dw.h @@ -40,8 +40,13 @@ struct dw_dma_chip { }; /* Export to the platform drivers */ +#if IS_ENABLED(CONFIG_DW_DMAC_CORE) int dw_dma_probe(struct dw_dma_chip *chip); int dw_dma_remove(struct dw_dma_chip *chip); +#else +static inline int dw_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; } +static inline int dw_dma_remove(struct dw_dma_chip *chip) { return 0; } +#endif /* CONFIG_DW_DMAC_CORE */ /* DMA API extensions */ struct dw_desc; -- cgit v1.2.3 From 469e857f374640f6164913835ce30d0736b40a60 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 12 Aug 2016 20:14:09 +0200 Subject: time: Avoid undefined behaviour in timespec64_add_safe() I ran into this: ================================================================================ UBSAN: Undefined behaviour in kernel/time/time.c:783:2 signed integer overflow: 5273 + 9223372036854771711 cannot be represented in type 'long int' CPU: 0 PID: 17363 Comm: trinity-c0 Not tainted 4.8.0-rc1+ #88 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 0000000000000000 ffff88011457f8f0 ffffffff82344f50 0000000041b58ab3 ffffffff84f98080 ffffffff82344ea4 ffff88011457f918 ffff88011457f8c8 ffff88011457f8e0 7fffffffffffefff ffff88011457f6d8 dffffc0000000000 Call Trace: [] dump_stack+0xac/0xfc [] ? _atomic_dec_and_lock+0xc4/0xc4 [] ubsan_epilogue+0xd/0x8a [] handle_overflow+0x202/0x23d [] ? val_to_string.constprop.6+0x11e/0x11e [] ? debug_smp_processor_id+0x17/0x20 [] ? __sigqueue_free.part.13+0x51/0x70 [] ? rcu_is_watching+0x110/0x110 [] __ubsan_handle_add_overflow+0xe/0x10 [] timespec64_add_safe+0x298/0x340 [] ? timespec_add_safe+0x330/0x330 [] ? wait_noreap_copyout+0x1d0/0x1d0 [] poll_select_set_timeout+0xf8/0x170 [] ? poll_schedule_timeout+0x2b0/0x2b0 [] ? __might_sleep+0x5b/0x260 [] __sys_recvmmsg+0x107/0x790 [] ? SyS_recvmsg+0x20/0x20 [] ? hrtimer_start_range_ns+0x3b8/0x1380 [] ? _raw_spin_unlock_irqrestore+0x3b/0x60 [] ? do_setitimer+0x39a/0x8e0 [] ? __might_sleep+0x5b/0x260 [] ? __sys_recvmmsg+0x790/0x790 [] SyS_recvmmsg+0xd9/0x160 [] ? __sys_recvmmsg+0x790/0x790 [] ? __this_cpu_preempt_check+0x13/0x20 [] ? __context_tracking_exit.part.3+0x30/0x1b0 [] ? __sys_recvmmsg+0x790/0x790 [] do_syscall_64+0x1b3/0x4b0 [] entry_SYSCALL64_slow_path+0x25/0x25 ================================================================================ Line 783 is this: 783 set_normalized_timespec64(&res, lhs.tv_sec + rhs.tv_sec, 784 lhs.tv_nsec + rhs.tv_nsec); In other words, since lhs.tv_sec and rhs.tv_sec are both time64_t, this is a signed addition which will cause undefined behaviour on overflow. Note that this is not currently a huge concern since the kernel should be built with -fno-strict-overflow by default, but could be a problem in the future, a problem with older compilers, or other compilers than gcc. The easiest way to avoid the overflow is to cast one of the arguments to unsigned (so the addition will be done using unsigned arithmetic). Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Richard Cochran Cc: Prarit Bhargava Signed-off-by: Vegard Nossum Signed-off-by: John Stultz --- include/linux/time64.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index 7e5d2fa9ac46..980c71b3001a 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -5,6 +5,7 @@ #include typedef __s64 time64_t; +typedef __u64 timeu64_t; /* * This wants to go into uapi/linux/time.h once we agreed about the -- cgit v1.2.3 From 979515c5645830465739254abc1b1648ada41518 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 13 Aug 2016 01:37:04 +0200 Subject: time: Avoid undefined behaviour in ktime_add_safe() I ran into this: ================================================================================ UBSAN: Undefined behaviour in kernel/time/hrtimer.c:310:16 signed integer overflow: 9223372036854775807 + 50000 cannot be represented in type 'long long int' CPU: 2 PID: 4798 Comm: trinity-c2 Not tainted 4.8.0-rc1+ #91 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 0000000000000000 ffff88010ce6fb88 ffffffff82344740 0000000041b58ab3 ffffffff84f97a20 ffffffff82344694 ffff88010ce6fbb0 ffff88010ce6fb60 000000000000c350 ffff88010ce6f968 dffffc0000000000 ffffffff857bc320 Call Trace: [] dump_stack+0xac/0xfc [] ? _atomic_dec_and_lock+0xc4/0xc4 [] ubsan_epilogue+0xd/0x8a [] handle_overflow+0x202/0x23d [] ? val_to_string.constprop.6+0x11e/0x11e [] ? timerqueue_add+0x151/0x410 [] ? hrtimer_start_range_ns+0x3b8/0x1380 [] ? memset+0x31/0x40 [] __ubsan_handle_add_overflow+0xe/0x10 [] hrtimer_nanosleep+0x5d9/0x790 [] ? hrtimer_init_sleeper+0x80/0x80 [] ? __might_sleep+0x5b/0x260 [] common_nsleep+0x20/0x30 [] SyS_clock_nanosleep+0x197/0x210 [] ? SyS_clock_getres+0x150/0x150 [] ? __this_cpu_preempt_check+0x13/0x20 [] ? __context_tracking_exit.part.3+0x30/0x1b0 [] ? SyS_clock_getres+0x150/0x150 [] do_syscall_64+0x1b3/0x4b0 [] entry_SYSCALL64_slow_path+0x25/0x25 ================================================================================ Add a new ktime_add_unsafe() helper which doesn't check for overflow, but doesn't throw a UBSAN warning when it does overflow either. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Richard Cochran Cc: Prarit Bhargava Signed-off-by: Vegard Nossum Signed-off-by: John Stultz --- include/linux/ktime.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 2b6a204bd8d4..3ffc69ebe967 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -63,6 +63,13 @@ static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs) #define ktime_add(lhs, rhs) \ ({ (ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }; }) +/* + * Same as ktime_add(), but avoids undefined behaviour on overflow; however, + * this means that you must check the result for overflow yourself. + */ +#define ktime_add_unsafe(lhs, rhs) \ + ({ (ktime_t){ .tv64 = (u64) (lhs).tv64 + (rhs).tv64 }; }) + /* * Add a ktime_t variable and a scalar nanosecond value. * res = kt + nsval: -- cgit v1.2.3 From d7a83d127a64fd91ef1ad39b7e2d78db36cf388b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Aug 2016 18:55:11 +0100 Subject: arm64: hw_breakpoint: convert CPU hotplug notifier to new infrastructure The arm64 hw_breakpoint implementation uses a CPU hotplug notifier to reset the {break,watch}point registers when CPUs come online. This patch converts the code to the new hotplug mechanism, whilst moving the invocation earlier to remove the need to disable IRQs explicitly in the driver (which could cause havok if we trip a watchpoint in an IRQ handler whilst restoring the debug register state). Cc: Sebastian Andrzej Siewior Reviewed-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 242bf530edfc..3758fe6d5968 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -45,6 +45,7 @@ enum cpuhp_state { CPUHP_AP_PERF_METAG_STARTING, CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, CPUHP_AP_ARM_VFP_STARTING, + CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, CPUHP_AP_PERF_ARM_STARTING, CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, -- cgit v1.2.3 From e937dd5782688928d8c4050237b93b0a51faebee Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Aug 2016 11:29:17 +0100 Subject: arm64: debug: convert OS lock CPU hotplug notifier to new infrastructure The arm64 debug monitor initialisation code uses a CPU hotplug notifier to clear the OS lock when CPUs come online. This patch converts the code to the new hotplug mechanism. Cc: Sebastian Andrzej Siewior Reviewed-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 3758fe6d5968..8c999a202452 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -45,6 +45,7 @@ enum cpuhp_state { CPUHP_AP_PERF_METAG_STARTING, CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, CPUHP_AP_ARM_VFP_STARTING, + CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, CPUHP_AP_PERF_ARM_STARTING, CPUHP_AP_ARM_L2X0_STARTING, -- cgit v1.2.3 From 3b5dd3a49496220b35af83c96e3d2ff5716550ae Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Thu, 1 Sep 2016 15:50:52 +0800 Subject: power: supply: sbs-battery: Use gpio_desc and sleeping calls for battery detect Switch to using new gpio_desc interface and devm gpio get calls to automatically manage gpio resource. Use gpiod_get_value which handles active high / low calls. If gpio_detect is set then force loading of the driver as it is reasonable to assume that the battery may not be present. Update the is_present flag immediately in the IRQ. Remove legacy gpio specification from platform data. Signed-off-by: Phil Reid Signed-off-by: Sebastian Reichel --- include/linux/power/sbs-battery.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/sbs-battery.h b/include/linux/power/sbs-battery.h index 2b0a9d9ff57e..811f1a0c00cb 100644 --- a/include/linux/power/sbs-battery.h +++ b/include/linux/power/sbs-battery.h @@ -26,15 +26,11 @@ /** * struct sbs_platform_data - platform data for sbs devices - * @battery_detect: GPIO which is used to detect battery presence - * @battery_detect_present: gpio state when battery is present (0 / 1) * @i2c_retry_count: # of times to retry on i2c IO failure * @poll_retry_count: # of times to retry looking for new status after * external change notification */ struct sbs_platform_data { - int battery_detect; - int battery_detect_present; int i2c_retry_count; int poll_retry_count; }; -- cgit v1.2.3 From f5b55fa1f81d518925d68b50d2316850c525d1ad Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Aug 2016 09:27:35 +0200 Subject: RAID/s390: provide raid6 recovery optimization The XC instruction can be used to improve the speed of the raid6 recovery. The loops now operate on blocks of 256 bytes. Signed-off-by: Martin Schwidefsky --- include/linux/raid/pq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index c032a6a408a6..395a4c674168 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -116,6 +116,7 @@ struct raid6_recov_calls { extern const struct raid6_recov_calls raid6_recov_intx1; extern const struct raid6_recov_calls raid6_recov_ssse3; extern const struct raid6_recov_calls raid6_recov_avx2; +extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; -- cgit v1.2.3 From 8861dd303cba879bae9a9dcee74042fb642bf03b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 31 Aug 2016 11:55:29 +0900 Subject: ftrace: Access ret_stack->subtime only in the function profiler The subtime is used only for function profiler with function graph tracer enabled. Move the definition of subtime under CONFIG_FUNCTION_PROFILER to reduce the memory usage. Also move the initialization of subtime into the graph entry callback. Link: http://lkml.kernel.org/r/20160831025529.24018-1-namhyung@kernel.org Cc: Ingo Molnar Cc: Josh Poimboeuf Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7d565afe35d2..1e2b316d6693 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -794,7 +794,9 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; +#ifdef CONFIG_FUNCTION_PROFILER unsigned long long subtime; +#endif unsigned long fp; }; -- cgit v1.2.3 From 2c4ddb215521d5dfb30f72123ef966ac6bdd16d7 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Sat, 27 Aug 2016 15:43:43 +0200 Subject: firmware: Amlogic: Add secure monitor driver Introduce a driver to provide calls into secure monitor mode. In the Amlogic SoCs these calls are used for multiple reasons: access to NVMEM, set USB boot, enable JTAG, etc... Acked-by: Mark Rutland Signed-off-by: Carlo Caione [khilman: add in SZ_4K cleanup] Signed-off-by: Kevin Hilman --- include/linux/firmware/meson/meson_sm.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/firmware/meson/meson_sm.h (limited to 'include/linux') diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h new file mode 100644 index 000000000000..8e953c6f394a --- /dev/null +++ b/include/linux/firmware/meson/meson_sm.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2016 Endless Mobile, Inc. + * Author: Carlo Caione + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _MESON_SM_FW_H_ +#define _MESON_SM_FW_H_ + +enum { + SM_EFUSE_READ, + SM_EFUSE_WRITE, + SM_EFUSE_USER_MAX, +}; + +struct meson_sm_firmware; + +int meson_sm_call(unsigned int cmd_index, u32 *ret, u32 arg0, u32 arg1, + u32 arg2, u32 arg3, u32 arg4); +int meson_sm_call_write(void *buffer, unsigned int b_size, unsigned int cmd_index, + u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); +int meson_sm_call_read(void *buffer, unsigned int cmd_index, u32 arg0, u32 arg1, + u32 arg2, u32 arg3, u32 arg4); + +#endif /* _MESON_SM_FW_H_ */ -- cgit v1.2.3 From d297653dd6f07afbe7e6c702a4bcd7615680002e Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 30 Aug 2016 21:56:45 -0700 Subject: rtnetlink: fdb dump: optimize by saving last interface markers fdb dumps spanning multiple skb's currently restart from the first interface again for every skb. This results in unnecessary iterations on the already visited interfaces and their fdb entries. In large scale setups, we have seen this to slow down fdb dumps considerably. On a system with 30k macs we see fdb dumps spanning across more than 300 skbs. To fix the problem, this patch replaces the existing single fdb marker with three markers: netdev hash entries, netdevs and fdb index to continue where we left off instead of restarting from the first netdev. This is consistent with link dumps. In the process of fixing the performance issue, this patch also re-implements fix done by commit 472681d57a5d ("net: ndo_fdb_dump should report -EMSGSIZE to rtnl_fdb_dump") (with an internal fix from Wilson Kok) in the following ways: - change ndo_fdb_dump handlers to return error code instead of the last fdb index - use cb->args strictly for dump frag markers and not error codes. This is consistent with other dump functions. Below results were taken on a system with 1000 netdevs and 35085 fdb entries: before patch: $time bridge fdb show | wc -l 15065 real 1m11.791s user 0m0.070s sys 1m8.395s (existing code does not return all macs) after patch: $time bridge fdb show | wc -l 35085 real 0m2.017s user 0m0.113s sys 0m1.942s Signed-off-by: Roopa Prabhu Signed-off-by: Wilson Kok Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- include/linux/rtnetlink.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d122be9345c7..67bb978470dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1031,7 +1031,7 @@ struct netdev_xdp { * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, - * int idx) + * int *idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * @@ -1263,7 +1263,7 @@ struct net_device_ops { struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2daece8979f7..57e54847b0b9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -105,7 +105,7 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, -- cgit v1.2.3 From b6cb5ac8331b6bcfe9ce38c7f7f58db6e1d6270a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 31 Aug 2016 15:36:52 +0200 Subject: net: bridge: add per-port multicast flood flag Add a per-port flag to control the unknown multicast flood, similar to the unknown unicast flood flag and break a few long lines in the netlink flag exports. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index dcb89e3515db..c6587c01d951 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -45,6 +45,7 @@ struct br_ip_list { #define BR_PROXYARP BIT(8) #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) +#define BR_MCAST_FLOOD BIT(11) #define BR_DEFAULT_AGEING_TIME (300 * HZ) -- cgit v1.2.3 From 46e36683f433528bfb7e5754ca5c5c86c204c40a Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 2 Sep 2016 13:20:21 +0200 Subject: serial: earlycon: Extend earlycon command line option to support 64-bit addresses earlycon implementation used "unsigned long" internally, but there are systems (ARM with LPAE) where sizeof(unsigned long) == 4 and uart is mapped beyond 4GiB address range. Switch to resource_size_t internally and replace obsoleted simple_strtoul() with kstrtoull(). Signed-off-by: Alexander Sverdlin Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 2f44e2013654..cdba6f144f72 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -374,7 +374,7 @@ extern int of_setup_earlycon(const struct earlycon_id *match, struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); -int uart_parse_earlycon(char *p, unsigned char *iotype, unsigned long *addr, +int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, char **options); void uart_parse_options(char *options, int *baud, int *parity, int *bits, int *flow); -- cgit v1.2.3 From d2f5a7311bcaed681a41cb3419b8fe92a7b68bf5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 23 Aug 2016 16:09:40 +0300 Subject: dmaengine: hsu: refactor hsu_dma_do_irq() to return int Since we have nice macro IRQ_RETVAL() we would use it to convert a flag of handled interrupt from int to irqreturn_t. The rationale of doing this is: a) hence we implicitly mark hsu_dma_do_irq() as an auxiliary function that can't be used as interrupt handler directly, and b) to be in align with serial driver which is using serial8250_handle_irq() that returns plain int by design. Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/dma/hsu.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h index aaff68efba5d..197eec63e501 100644 --- a/include/linux/dma/hsu.h +++ b/include/linux/dma/hsu.h @@ -41,8 +41,7 @@ struct hsu_dma_chip { /* Export to the internal users */ int hsu_dma_get_status(struct hsu_dma_chip *chip, unsigned short nr, u32 *status); -irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, - u32 status); +int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, u32 status); /* Export to the platform drivers */ int hsu_dma_probe(struct hsu_dma_chip *chip); @@ -53,10 +52,10 @@ static inline int hsu_dma_get_status(struct hsu_dma_chip *chip, { return 0; } -static inline irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, - unsigned short nr, u32 status) +static inline int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, + u32 status) { - return IRQ_NONE; + return 0; } static inline int hsu_dma_probe(struct hsu_dma_chip *chip) { return -ENODEV; } static inline int hsu_dma_remove(struct hsu_dma_chip *chip) { return 0; } -- cgit v1.2.3 From bb08d431a914984dee278e0e0482a2e6d620a482 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 2 Sep 2016 05:58:22 -0700 Subject: Drivers: hv: ring_buffer: count on wrap around mappings in get_next_pkt_raw() With wrap around mappings in place we can always provide drivers with direct links to packets on the ring buffer, even when they wrap around. Do the required updates to get_next_pkt_raw()/put_pkt_raw() Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Tested-by: Dexuan Cui Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 755e8f566a47..e6ef571e6100 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1469,31 +1469,23 @@ static inline struct vmpacket_descriptor * get_next_pkt_raw(struct vmbus_channel *channel) { struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; + u32 priv_read_loc = ring_info->priv_read_index; void *ring_buffer = hv_get_ring_buffer(ring_info); - struct vmpacket_descriptor *cur_desc; - u32 packetlen; u32 dsize = ring_info->ring_datasize; - u32 delta = read_loc - ring_info->ring_buffer->read_index; + /* + * delta is the difference between what is available to read and + * what was already consumed in place. We commit read index after + * the whole batch is processed. + */ + u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ? + priv_read_loc - ring_info->ring_buffer->read_index : + (dsize - ring_info->ring_buffer->read_index) + priv_read_loc; u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) return NULL; - if ((read_loc + sizeof(*cur_desc)) > dsize) - return NULL; - - cur_desc = ring_buffer + read_loc; - packetlen = cur_desc->len8 << 3; - - /* - * If the packet under consideration is wrapping around, - * return failure. - */ - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) - return NULL; - - return cur_desc; + return ring_buffer + priv_read_loc; } /* @@ -1505,16 +1497,14 @@ static inline void put_pkt_raw(struct vmbus_channel *channel, struct vmpacket_descriptor *desc) { struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; u32 packetlen = desc->len8 << 3; u32 dsize = ring_info->ring_datasize; - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) - BUG(); /* * Include the packet trailer. */ ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; + ring_info->priv_read_index %= dsize; } /* -- cgit v1.2.3 From 509879bdb30b8e12bd0b3cb0bc8429f01478df4b Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 2 Sep 2016 05:58:23 -0700 Subject: Drivers: hv: Introduce a policy for controlling channel affinity Introduce a mechanism to control how channels will be affinitized. We will support two policies: 1. HV_BALANCED: All performance critical channels will be dstributed evenly amongst all the available NUMA nodes. Once the Node is assigned, we will assign the CPU based on a simple round robin scheme. 2. HV_LOCALIZED: Only the primary channels are distributed across all NUMA nodes. Sub-channels will be in the same NUMA node as the primary channel. This is the current behaviour. The default policy will be the HV_BALANCED as it can minimize the remote memory access on NUMA machines with applications that span NUMA nodes. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index e6ef571e6100..c877e7980585 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -674,6 +674,11 @@ enum hv_signal_policy { HV_SIGNAL_POLICY_EXPLICIT, }; +enum hv_numa_policy { + HV_BALANCED = 0, + HV_LOCALIZED, +}; + enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -876,6 +881,18 @@ struct vmbus_channel { */ bool low_latency; + /* + * NUMA distribution policy: + * We support teo policies: + * 1) Balanced: Here all performance critical channels are + * distributed evenly amongst all the NUMA nodes. + * This policy will be the default policy. + * 2) Localized: All channels of a given instance of a + * performance critical service will be assigned CPUs + * within a selected NUMA node. + */ + enum hv_numa_policy affinity_policy; + }; static inline void set_channel_lock_state(struct vmbus_channel *c, bool state) @@ -895,6 +912,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c, c->signal_policy = policy; } +static inline void set_channel_affinity_state(struct vmbus_channel *c, + enum hv_numa_policy policy) +{ + c->affinity_policy = policy; +} + static inline void set_channel_read_state(struct vmbus_channel *c, bool state) { c->batched_reading = state; -- cgit v1.2.3 From 7b2c86250122de316cbab8754050622ead04af39 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 4 Aug 2016 12:49:53 -0400 Subject: tracing: Add NMI tracing in hwlat detector As NMIs can also cause latency when interrupts are disabled, the hwlat detectory has no way to know if the latency it detects is from an NMI or an SMI or some other hardware glitch. As ftrace_nmi_enter/exit() funtions are no longer used (except for sh, which isn't supported anymore), I converted those to "arch_ftrace_nmi_enter/exit" and use ftrace_nmi_enter/exit() to check if hwlat detector is tracing or not, and if so, it calls into the hwlat utility. Since the hwlat detector only has a single kthread that is spinning with interrupts disabled, it marks what CPU it is on, and if the NMI callback happens on that CPU, it records the time spent in that NMI. This is added to the output that is generated by the hwlat detector as: #3 inner/outer(us): 9/9 ts:1470836488.206734548 #4 inner/outer(us): 0/8 ts:1470836497.140808588 #5 inner/outer(us): 0/6 ts:1470836499.140825168 nmi-total:5 nmi-count:1 #6 inner/outer(us): 9/9 ts:1470836501.140841748 All time is still tracked in microseconds. The NMI information is only shown when an NMI occurred during the sample. Signed-off-by: Steven Rostedt --- include/linux/ftrace_irq.h | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index dca7bf8cffe2..4ec2c9b205f2 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -3,11 +3,34 @@ #ifdef CONFIG_FTRACE_NMI_ENTER -extern void ftrace_nmi_enter(void); -extern void ftrace_nmi_exit(void); +extern void arch_ftrace_nmi_enter(void); +extern void arch_ftrace_nmi_exit(void); #else -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } +static inline void arch_ftrace_nmi_enter(void) { } +static inline void arch_ftrace_nmi_exit(void) { } #endif +#ifdef CONFIG_HWLAT_TRACER +extern bool trace_hwlat_callback_enabled; +extern void trace_hwlat_callback(bool enter); +#endif + +static inline void ftrace_nmi_enter(void) +{ +#ifdef CONFIG_HWLAT_TRACER + if (trace_hwlat_callback_enabled) + trace_hwlat_callback(true); +#endif + arch_ftrace_nmi_enter(); +} + +static inline void ftrace_nmi_exit(void) +{ + arch_ftrace_nmi_exit(); +#ifdef CONFIG_HWLAT_TRACER + if (trace_hwlat_callback_enabled) + trace_hwlat_callback(false); +#endif +} + #endif /* _LINUX_FTRACE_IRQ_H */ -- cgit v1.2.3 From 0515e5999a466dfe6e1924f460da599bb6821487 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 1 Sep 2016 18:37:22 -0700 Subject: bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type Introduce BPF_PROG_TYPE_PERF_EVENT programs that can be attached to HW and SW perf events (PERF_TYPE_HARDWARE and PERF_TYPE_SOFTWARE correspondingly in uapi/linux/perf_event.h) The program visible context meta structure is struct bpf_perf_event_data { struct pt_regs regs; __u64 sample_period; }; which is accessible directly from the program: int bpf_prog(struct bpf_perf_event_data *ctx) { ... ctx->sample_period ... ... ctx->regs.ip ... } The bpf verifier rewrites the accesses into kernel internal struct bpf_perf_event_data_kern which allows changing struct perf_sample_data without affecting bpf programs. New fields can be added to the end of struct bpf_perf_event_data in the future. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2b6b43cc0dd5..97bfe62f30d7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -788,6 +788,11 @@ struct perf_output_handle { int page; }; +struct bpf_perf_event_data_kern { + struct pt_regs *regs; + struct perf_sample_data *data; +}; + #ifdef CONFIG_CGROUP_PERF /* -- cgit v1.2.3 From aa6a5f3cb2b2edc5b9aab0b4fdfdfa9c3b5096a8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 1 Sep 2016 18:37:24 -0700 Subject: perf, bpf: add perf events core support for BPF_PROG_TYPE_PERF_EVENT programs Allow attaching BPF_PROG_TYPE_PERF_EVENT programs to sw and hw perf events via overflow_handler mechanism. When program is attached the overflow_handlers become stacked. The program acts as a filter. Returning zero from the program means that the normal perf_event_output handler will not be called and sampling event won't be stored in the ring buffer. The overflow_handler_context==NULL is an additional safety check to make sure programs are not attached to hw breakpoints and watchdog in case other checks (that prevent that now anyway) get accidentally relaxed in the future. The program refcnt is incremented in case perf_events are inhereted when target task is forked. Similar to kprobe and tracepoint programs there is no ioctl to detach the program or swap already attached program. The user space expected to close(perf_event_fd) like it does right now for kprobe+bpf. That restriction simplifies the code quite a bit. The invocation of overflow_handler in __perf_event_overflow() is now done via READ_ONCE, since that pointer can be replaced when the program is attached while perf_event itself could have been active already. There is no need to do similar treatment for event->prog, since it's assigned only once before it's accessed. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++++ include/linux/perf_event.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 11134238417d..9a904f63f8c1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -297,6 +297,10 @@ static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) static inline void bpf_prog_put(struct bpf_prog *prog) { } +static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 97bfe62f30d7..ccb73a58113d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -679,6 +679,10 @@ struct perf_event { u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; +#ifdef CONFIG_BPF_SYSCALL + perf_overflow_handler_t orig_overflow_handler; + struct bpf_prog *prog; +#endif #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; -- cgit v1.2.3 From cf392d10b69e6e6c57ceea48b347a2ab1a4b75b2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Aug 2016 19:49:39 +0200 Subject: cpu/hotplug: Add multi instance support This patch adds the ability for a given state to have multiple instances. Until now all states have a single instance and the startup / teardown callback use global variables. A few drivers need to perform a the same callbacks on multiple "instances". Currently we have three drivers in tree which all have a global list which they iterate over. With multi instance they support don't need their private list and the functionality has been moved into core code. Plus we hold the hotplug lock in core so no cpus comes/goes while instances are registered and we do rollback in error case :) Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Cc: Mark Rutland Cc: Peter Zijlstra Cc: Will Deacon Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/1471024183-12666-3-git-send-email-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 110 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 242bf530edfc..dcfe619171b4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -99,7 +99,7 @@ enum cpuhp_state { int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, int (*startup)(unsigned int cpu), - int (*teardown)(unsigned int cpu)); + int (*teardown)(unsigned int cpu), bool multi_instance); /** * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks @@ -116,7 +116,7 @@ static inline int cpuhp_setup_state(enum cpuhp_state state, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu)) { - return __cpuhp_setup_state(state, name, true, startup, teardown); + return __cpuhp_setup_state(state, name, true, startup, teardown, false); } /** @@ -135,7 +135,66 @@ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu)) { - return __cpuhp_setup_state(state, name, false, startup, teardown); + return __cpuhp_setup_state(state, name, false, startup, teardown, + false); +} + +/** + * cpuhp_setup_state_multi - Add callbacks for multi state + * @state: The state for which the calls are installed + * @name: Name of the callback. + * @startup: startup callback function + * @teardown: teardown callback function + * + * Sets the internal multi_instance flag and prepares a state to work as a multi + * instance callback. No callbacks are invoked at this point. The callbacks are + * invoked once an instance for this state are registered via + * @cpuhp_state_add_instance or @cpuhp_state_add_instance_nocalls. + */ +static inline int cpuhp_setup_state_multi(enum cpuhp_state state, + const char *name, + int (*startup)(unsigned int cpu, + struct hlist_node *node), + int (*teardown)(unsigned int cpu, + struct hlist_node *node)) +{ + return __cpuhp_setup_state(state, name, false, + (void *) startup, + (void *) teardown, true); +} + +int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, + bool invoke); + +/** + * cpuhp_state_add_instance - Add an instance for a state and invoke startup + * callback. + * @state: The state for which the instance is installed + * @node: The node for this individual state. + * + * Installs the instance for the @state and invokes the startup callback on + * the present cpus which have already reached the @state. The @state must have + * been earlier marked as multi-instance by @cpuhp_setup_state_multi. + */ +static inline int cpuhp_state_add_instance(enum cpuhp_state state, + struct hlist_node *node) +{ + return __cpuhp_state_add_instance(state, node, true); +} + +/** + * cpuhp_state_add_instance_nocalls - Add an instance for a state without + * invoking the startup callback. + * @state: The state for which the instance is installed + * @node: The node for this individual state. + * + * Installs the instance for the @state The @state must have been earlier + * marked as multi-instance by @cpuhp_setup_state_multi. + */ +static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state, + struct hlist_node *node) +{ + return __cpuhp_state_add_instance(state, node, false); } void __cpuhp_remove_state(enum cpuhp_state state, bool invoke); @@ -162,6 +221,51 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state) __cpuhp_remove_state(state, false); } +/** + * cpuhp_remove_multi_state - Remove hotplug multi state callback + * @state: The state for which the calls are removed + * + * Removes the callback functions from a multi state. This is the reverse of + * cpuhp_setup_state_multi(). All instances should have been removed before + * invoking this function. + */ +static inline void cpuhp_remove_multi_state(enum cpuhp_state state) +{ + __cpuhp_remove_state(state, false); +} + +int __cpuhp_state_remove_instance(enum cpuhp_state state, + struct hlist_node *node, bool invoke); + +/** + * cpuhp_state_remove_instance - Remove hotplug instance from state and invoke + * the teardown callback + * @state: The state from which the instance is removed + * @node: The node for this individual state. + * + * Removes the instance and invokes the teardown callback on the present cpus + * which have already reached the @state. + */ +static inline int cpuhp_state_remove_instance(enum cpuhp_state state, + struct hlist_node *node) +{ + return __cpuhp_state_remove_instance(state, node, true); +} + +/** + * cpuhp_state_remove_instance_nocalls - Remove hotplug instance from state + * without invoking the reatdown callback + * @state: The state from which the instance is removed + * @node: The node for this individual state. + * + * Removes the instance without invoking the teardown callback. + */ +static inline int cpuhp_state_remove_instance_nocalls(enum cpuhp_state state, + struct hlist_node *node) +{ + return __cpuhp_state_remove_instance(state, node, false); +} + #ifdef CONFIG_SMP void cpuhp_online_idle(enum cpuhp_state state); #else -- cgit v1.2.3 From 6e103c0cfeb9ab8d40822a015da9769595096411 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 17 Aug 2016 19:14:20 +0200 Subject: arm/perf: Use multi instance instead of custom list Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Mark Rutland Cc: Will Deacon Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160817171420.sdwk2qivxunzryz4@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/perf/arm_pmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index e18843809eec..4ad1b408c0bb 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -109,7 +109,7 @@ struct arm_pmu { DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; - struct list_head entry; + struct hlist_node node; struct notifier_block cpu_pm_nb; }; -- cgit v1.2.3 From 8017c279196ab29174bafc104ac4ebbd42c7ca7f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 12 Aug 2016 19:49:43 +0200 Subject: net/virtio-net: Convert to hotplug state machine Install the callbacks via the state machine. The driver supports multiple instances and therefore the new cpuhp_state_add_instance_nocalls() infrastrucure is used. The driver currently uses get_online_cpus() to avoid missing a CPU hotplug event while invoking virtnet_set_affinity(). This could be avoided by using cpuhp_state_add_instance() variant which holds the hotplug lock and invokes callback during registration. This is more or less a 1:1 conversion of the current code. Signed-off-by: Sebastian Andrzej Siewior Cc: Mark Rutland Cc: "Michael S. Tsirkin" Cc: Peter Zijlstra Cc: netdev@vger.kernel.org Cc: Will Deacon Cc: virtualization@lists.linux-foundation.org Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/1471024183-12666-7-git-send-email-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index dcfe619171b4..b95f7adfbf8b 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -14,6 +14,7 @@ enum cpuhp_state { CPUHP_PERF_SUPERH, CPUHP_X86_HPET_DEAD, CPUHP_X86_APB_DEAD, + CPUHP_VIRT_NET_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From f88eecfe2f22b2790e7527c0aaec14ea175919de Mon Sep 17 00:00:00 2001 From: Sebastian Frias Date: Tue, 16 Aug 2016 16:05:08 +0200 Subject: genirq/generic_chip: Verify irqs_per_chip <= 32 Most (if not all) code here implicitly assumes that the maximum number of IRQs per chip will be 32, and thus uses 'u32' or 'unsigned long' for many tasks (for example "struct irq_data" declares its 'mask' field as 'u32', and "struct irq_chip_generic" declares its 'installed' field as 'unsigned long') However, there is no check to verify that irqs_per_chip is <= 32. Hence, calling irq_alloc_domain_generic_chips() with a bigger value will result in unexpected results. Provide a wrapper with a MAYBE_BUILD_BUG_ON(nrirqs >= 32) to catch such cases. [ tglx: Reduced changelog to the essential information ] Signed-off-by: Sebastian Frias Cc: Marc Zyngier Cc: Mason Cc: Jason Cooper Link: http://lkml.kernel.org/r/57B31D94.5040701@laposte.net Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index b52424eaa0ed..603986741f2c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -916,12 +916,20 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, unsigned int clr, unsigned int set); struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq); -int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, - int num_ct, const char *name, - irq_flow_handler_t handler, - unsigned int clr, unsigned int set, - enum irq_gc_flags flags); +int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, + int num_ct, const char *name, + irq_flow_handler_t handler, + unsigned int clr, unsigned int set, + enum irq_gc_flags flags); + +#define irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name, \ + handler, clr, set, flags) \ +({ \ + MAYBE_BUILD_BUG_ON(irqs_per_chip > 32); \ + __irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,\ + handler, clr, set, flags); \ +}) static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) { -- cgit v1.2.3 From 3f37ec79dd21fbdbbab8143a48a87272b22fef22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 25 Jul 2016 20:33:56 +0200 Subject: bcma: support BCM53573 series of wireless SoCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BCM53573 seems to be the first series of Northstar family with wireless on the chip. The base models are BCM53573-s (A0, A1) and there is also BCM47189B0 which seems to be some small modification. The only problem with these chipsets seems to be watchdog. It's totally unavailable on 53573A0 / 53573A1 and preferable PMU watchdog is broken on 53573B0 / 53573B1. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 3db25df396cb..8eeedb2db924 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -205,6 +205,9 @@ struct bcma_host_ops { #define BCMA_PKG_ID_BCM4709 0 #define BCMA_CHIP_ID_BCM47094 53030 #define BCMA_CHIP_ID_BCM53018 53018 +#define BCMA_CHIP_ID_BCM53573 53573 +#define BCMA_PKG_ID_BCM53573 0 +#define BCMA_PKG_ID_BCM47189 1 /* Board types (on PCI usually equals to the subsystem dev id) */ /* BCM4313 */ -- cgit v1.2.3 From c8cdf70890d89c07c9e890b103106d58999f0ce4 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Fri, 2 Sep 2016 23:36:15 -0700 Subject: iio: trigger: allow immutable triggers to be assigned There are times when an assigned trigger to a device shouldn't ever change after intialization. Examples of this being used is when an provider device has a trigger that is assigned to an ADC, which uses it populate data into a callback buffer. Signed-off-by: Matt Ranostay Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 ++ include/linux/iio/trigger.h | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 854e2dad1e0d..786952cd509f 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -483,6 +483,7 @@ struct iio_buffer_setup_ops { * @scan_timestamp: [INTERN] set if any buffers have requested timestamp * @scan_index_timestamp:[INTERN] cache of the index to the timestamp * @trig: [INTERN] current device trigger (buffer modes) + * @trig_readonly [INTERN] mark the current trigger immutable * @pollfunc: [DRIVER] function run on trigger being received * @pollfunc_event: [DRIVER] function run on events trigger being received * @channels: [DRIVER] channel specification structure table @@ -523,6 +524,7 @@ struct iio_dev { bool scan_timestamp; unsigned scan_index_timestamp; struct iio_trigger *trig; + bool trig_readonly; struct iio_poll_func *pollfunc; struct iio_poll_func *pollfunc_event; diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 1c9e028e0d4a..a122bdd4076c 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -131,6 +131,15 @@ int iio_trigger_register(struct iio_trigger *trig_info); **/ void iio_trigger_unregister(struct iio_trigger *trig_info); +/** + * iio_trigger_set_immutable() - set an immutable trigger on destination + * + * @indio_dev - IIO device structure containing the device + * @trig - trigger to assign to device + * + **/ +int iio_trigger_set_immutable(struct iio_dev *indio_dev, struct iio_trigger *trig); + /** * iio_trigger_poll() - called on a trigger occurring * @trig: trigger which occurred -- cgit v1.2.3 From 9083325f1197a6956db17809d74dbe3578dc1005 Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Fri, 2 Sep 2016 20:47:54 +0200 Subject: iio:trigger: add resource managed (un)register Add resource managed devm_iio_trigger_register() and devm_iio_triger_unregister() to automatically clean up registered triggers allocated by IIO drivers, thus leading to simplified IIO drivers code. Signed-off-by: Gregor Boirie Signed-off-by: Jonathan Cameron --- include/linux/iio/trigger.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index a122bdd4076c..f0890a5abf13 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -125,12 +125,18 @@ static inline void *iio_trigger_get_drvdata(struct iio_trigger *trig) **/ int iio_trigger_register(struct iio_trigger *trig_info); +int devm_iio_trigger_register(struct device *dev, + struct iio_trigger *trig_info); + /** * iio_trigger_unregister() - unregister a trigger from the core * @trig_info: trigger to be unregistered **/ void iio_trigger_unregister(struct iio_trigger *trig_info); +void devm_iio_trigger_unregister(struct device *dev, + struct iio_trigger *trig_info); + /** * iio_trigger_set_immutable() - set an immutable trigger on destination * -- cgit v1.2.3 From 70e483487db787b152da756d4be0fef917378142 Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Fri, 2 Sep 2016 20:47:55 +0200 Subject: iio: add resource managed triggered buffer init helpers Add resource managed devm_iio_triggered_buffer_setup() and devm_iio_triggered_buffer_cleanup() to automatically clean up triggered buffers setup by IIO drivers, thus leading to simplified IIO drivers code. Signed-off-by: Gregor Boirie Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 1 + include/linux/iio/triggered_buffer.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 786952cd509f..b4a0679e4a49 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -644,6 +644,7 @@ static inline struct iio_dev *iio_priv_to_dev(void *priv) } void iio_device_free(struct iio_dev *indio_dev); +int devm_iio_device_match(struct device *dev, void *res, void *data); struct iio_dev *devm_iio_device_alloc(struct device *dev, int sizeof_priv); void devm_iio_device_free(struct device *dev, struct iio_dev *indio_dev); struct iio_trigger *devm_iio_trigger_alloc(struct device *dev, diff --git a/include/linux/iio/triggered_buffer.h b/include/linux/iio/triggered_buffer.h index f72f70d5a97b..30145616773d 100644 --- a/include/linux/iio/triggered_buffer.h +++ b/include/linux/iio/triggered_buffer.h @@ -12,4 +12,12 @@ int iio_triggered_buffer_setup(struct iio_dev *indio_dev, const struct iio_buffer_setup_ops *setup_ops); void iio_triggered_buffer_cleanup(struct iio_dev *indio_dev); +int devm_iio_triggered_buffer_setup(struct device *dev, + struct iio_dev *indio_dev, + irqreturn_t (*h)(int irq, void *p), + irqreturn_t (*thread)(int irq, void *p), + const struct iio_buffer_setup_ops *ops); +void devm_iio_triggered_buffer_cleanup(struct device *dev, + struct iio_dev *indio_dev); + #endif -- cgit v1.2.3 From b8fb03785d4de097507d0cf45873525e0ac4d2b2 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 1 Sep 2016 11:39:33 -0700 Subject: locking/static_keys: Provide DECLARE and well as DEFINE macros We will need to provide declarations of static keys in header files. Provide DECLARE_STATIC_KEY_{TRUE,FALSE} macros. Signed-off-by: Tony Luck Acked-by: Borislav Petkov Cc: Peter Zijlstra Cc: Dan Williams Cc: Linus Torvalds Link: http://lkml.kernel.org/r/816881cf85bd3cf13385d212882618f38a3b5d33.1472754711.git.tony.luck@intel.com Signed-off-by: Thomas Gleixner --- include/linux/jump_label.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 661af564fae8..595fb46213fc 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -267,9 +267,15 @@ struct static_key_false { #define DEFINE_STATIC_KEY_TRUE(name) \ struct static_key_true name = STATIC_KEY_TRUE_INIT +#define DECLARE_STATIC_KEY_TRUE(name) \ + extern struct static_key_true name + #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT +#define DECLARE_STATIC_KEY_FALSE(name) \ + extern struct static_key_false name + extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ -- cgit v1.2.3 From bd6fcefc66f6d038406e38edf96a95d9842f819d Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 23 Aug 2016 13:52:37 -0500 Subject: iommu/amd: Adding GALOG interrupt handler This patch adds AMD IOMMU guest virtual APIC log (GALOG) handler. When IOMMU hardware receives an interrupt targeting a blocking vcpu, it creates an entry in the GALOG, and generates an interrupt to notify the AMD IOMMU driver. At this point, the driver processes the log entry, and notify the SVM driver via the registered iommu_ga_log_notifier function. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- include/linux/amd-iommu.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 2b08e79f5100..465d096a5f4b 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -168,11 +168,25 @@ typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid); extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, amd_iommu_invalidate_ctx cb); - -#else +#else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } -#endif +#endif /* CONFIG_AMD_IOMMU */ + +#if defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) + +/* IOMMU AVIC Function */ +extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)); + +#else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ + +static inline int +amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) +{ + return 0; +} + +#endif /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ #endif /* _ASM_X86_AMD_IOMMU_H */ -- cgit v1.2.3 From 8dbea3fd7becd4af8ca882c3132be4b1a857e301 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 23 Aug 2016 13:52:38 -0500 Subject: iommu/amd: Introduce amd_iommu_update_ga() Introduces a new IOMMU API, amd_iommu_update_ga(), which allows KVM (SVM) to update existing posted interrupt IOMMU IRTE when load/unload vcpu. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- include/linux/amd-iommu.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 465d096a5f4b..d8d48aca0eb7 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -179,6 +179,9 @@ static inline int amd_iommu_detect(void) { return -ENODEV; } /* IOMMU AVIC Function */ extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)); +extern int +amd_iommu_update_ga(int cpu, bool is_run, void *data); + #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ static inline int @@ -187,6 +190,12 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) return 0; } +static inline int +amd_iommu_update_ga(int cpu, bool is_run, void *data) +{ + return 0; +} + #endif /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ #endif /* _ASM_X86_AMD_IOMMU_H */ -- cgit v1.2.3 From b9fc6b56f478b487dc8fc400da73d89ac9137201 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 23 Aug 2016 13:52:39 -0500 Subject: iommu/amd: Implements irq_set_vcpu_affinity() hook to setup vapic mode for pass-through devices This patch implements irq_set_vcpu_affinity() function to set up interrupt remapping table entry with vapic mode for pass-through devices. In case requirements for vapic mode are not met, it falls back to set up the IRTE in legacy mode. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- include/linux/amd-iommu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index d8d48aca0eb7..09751d349963 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -22,6 +22,20 @@ #include +/* + * This is mainly used to communicate information back-and-forth + * between SVM and IOMMU for setting up and tearing down posted + * interrupt + */ +struct amd_iommu_pi_data { + u32 ga_tag; + u32 prev_ga_tag; + u64 base; + bool is_guest_mode; + struct vcpu_data *vcpu_data; + void *ir_data; +}; + #ifdef CONFIG_AMD_IOMMU struct task_struct; -- cgit v1.2.3 From c65a8b51123a14f6960e4238bfa4673d54ee183a Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Sat, 3 Sep 2016 00:09:53 +0200 Subject: power: supply: bq24735-charger: Request status GPIO with initial input setup This requests the status GPIO with initial input setup. It is required to read the GPIO status at probe time and thus correctly avoid sending I2C messages when AC is not plugged. When requesting the GPIO without initial input setup, it always reads 0 which causes probe to fail as it assumes the charger is connected, sends I2C messages and fails. While at it, this switches the driver over to gpiod API. Signed-off-by: Paul Kocialkowski Signed-off-by: Sebastian Reichel --- include/linux/power/bq24735-charger.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/bq24735-charger.h b/include/linux/power/bq24735-charger.h index 6b750c1a45fa..b04be59f914c 100644 --- a/include/linux/power/bq24735-charger.h +++ b/include/linux/power/bq24735-charger.h @@ -28,10 +28,6 @@ struct bq24735_platform { const char *name; - int status_gpio; - int status_gpio_active_low; - bool status_gpio_valid; - bool ext_control; char **supplied_to; -- cgit v1.2.3 From 47ae4b05d0fa2f2a998ebaf34d2dcbffca56a9db Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 29 Aug 2016 08:48:43 +0200 Subject: virt, sched: Add generic vCPU pinning support Add generic virtualization support for pinning the current vCPU to a specified physical CPU. As this operation isn't performance critical (a very limited set of operations like BIOS calls and SMIs is expected to need this) just add a hypervisor specific indirection. Signed-off-by: Juergen Gross Signed-off-by: Peter Zijlstra (Intel) Cc: Douglas_Warzecha@dell.com Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akataria@vmware.com Cc: boris.ostrovsky@oracle.com Cc: chrisw@sous-sol.org Cc: david.vrabel@citrix.com Cc: hpa@zytor.com Cc: jdelvare@suse.com Cc: jeremy@goop.org Cc: linux@roeck-us.net Cc: pali.rohar@gmail.com Cc: rusty@rustcorp.com.au Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1472453327-19050-3-git-send-email-jgross@suse.com Signed-off-by: Ingo Molnar --- include/linux/hypervisor.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/hypervisor.h (limited to 'include/linux') diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h new file mode 100644 index 000000000000..3fa5ef2b3759 --- /dev/null +++ b/include/linux/hypervisor.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_HYPEVISOR_H +#define __LINUX_HYPEVISOR_H + +/* + * Generic Hypervisor support + * Juergen Gross + */ + +#ifdef CONFIG_HYPERVISOR_GUEST +#include +#else +static inline void hypervisor_pin_vcpu(int cpu) +{ +} +#endif + +#endif /* __LINUX_HYPEVISOR_H */ -- cgit v1.2.3 From df8ce9d78a4e7fbe7ddfd8ccee3ecaaa0013e883 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 29 Aug 2016 08:48:44 +0200 Subject: smp: Add function to execute a function synchronously on a CPU On some hardware models (e.g. Dell Studio 1555 laptop) some hardware related functions (e.g. SMIs) are to be executed on physical CPU 0 only. Instead of open coding such a functionality multiple times in the kernel add a service function for this purpose. This will enable the possibility to take special measures in virtualized environments like Xen, too. Signed-off-by: Juergen Gross Signed-off-by: Peter Zijlstra (Intel) Cc: Douglas_Warzecha@dell.com Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akataria@vmware.com Cc: boris.ostrovsky@oracle.com Cc: chrisw@sous-sol.org Cc: david.vrabel@citrix.com Cc: hpa@zytor.com Cc: jdelvare@suse.com Cc: jeremy@goop.org Cc: linux@roeck-us.net Cc: pali.rohar@gmail.com Cc: rusty@rustcorp.com.au Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1472453327-19050-4-git-send-email-jgross@suse.com Signed-off-by: Ingo Molnar --- include/linux/smp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index eccae4690f41..8e0cb7a0f836 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -196,6 +196,9 @@ extern void arch_enable_nonboot_cpus_end(void); void smp_setup_processor_id(void); +int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, + bool phys); + /* SMP core functions */ int smpcfd_prepare_cpu(unsigned int cpu); int smpcfd_dead_cpu(unsigned int cpu); -- cgit v1.2.3 From 0733424c9ba9f42242409d1ece780777272f7ea1 Mon Sep 17 00:00:00 2001 From: David Hsu Date: Tue, 9 Aug 2016 14:57:46 -0700 Subject: pwm: Unexport children before chip removal Exported pwm channels aren't removed before the pwmchip and are leaked. This results in invalid sysfs files. This fix removes all exported pwm channels before chip removal. Signed-off-by: David Hsu Fixes: 76abbdde2d95 ("pwm: Add sysfs interface") Signed-off-by: Thierry Reding --- include/linux/pwm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index f1bbae014889..2c6c5114c089 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -641,6 +641,7 @@ static inline void pwm_remove_table(struct pwm_lookup *table, size_t num) #ifdef CONFIG_PWM_SYSFS void pwmchip_sysfs_export(struct pwm_chip *chip); void pwmchip_sysfs_unexport(struct pwm_chip *chip); +void pwmchip_sysfs_unexport_children(struct pwm_chip *chip); #else static inline void pwmchip_sysfs_export(struct pwm_chip *chip) { @@ -649,6 +650,10 @@ static inline void pwmchip_sysfs_export(struct pwm_chip *chip) static inline void pwmchip_sysfs_unexport(struct pwm_chip *chip) { } + +static inline void pwmchip_sysfs_unexport_children(struct pwm_chip *chip) +{ +} #endif /* CONFIG_PWM_SYSFS */ #endif /* __LINUX_PWM_H */ -- cgit v1.2.3 From c8a9a6daccad495c48d5435d3487956ce01bc6a1 Mon Sep 17 00:00:00 2001 From: Lin Huang Date: Thu, 4 Aug 2016 19:32:33 +0900 Subject: PM / devfreq: event: remove duplicate devfreq_event_get_drvdata() there define two devfreq_event_get_drvdata() function in devfreq-event.h when disable CONFIG_PM_DEVFREQ_EVENT, it will lead to build fail. So remove devfreq_event_get_drvdata() function. Fixes: f262f28c1470 ("PM / devfreq: event: Add devfreq_event class") Cc: stable@vger.kernel.org Signed-off-by: Lin Huang Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- include/linux/devfreq-event.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h index 0a83a1e648b0..4db00b02ca3f 100644 --- a/include/linux/devfreq-event.h +++ b/include/linux/devfreq-event.h @@ -148,11 +148,6 @@ static inline int devfreq_event_reset_event(struct devfreq_event_dev *edev) return -EINVAL; } -static inline void *devfreq_event_get_drvdata(struct devfreq_event_dev *edev) -{ - return ERR_PTR(-EINVAL); -} - static inline struct devfreq_event_dev *devfreq_event_get_edev_by_phandle( struct device *dev, int index) { -- cgit v1.2.3 From 6691402313ddda232e6a401af8841b5fe676a62f Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Tue, 16 Aug 2016 19:04:47 +0300 Subject: usb: ulpi: add new api functions, {read|write}_dev() Add these two new api callbacks to struct ulpi_ops. These are different than read, write in that they pass the parent device directly instead of via the ops argument. They are intended to replace the old api functions. If the new api callbacks are missing, revert to calling the old ones as before. Acked-by: Heikki Krogerus Signed-off-by: Tal Shorer Signed-off-by: Felipe Balbi --- include/linux/ulpi/interface.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ulpi/interface.h b/include/linux/ulpi/interface.h index 4de8ab491038..d8189d08eddb 100644 --- a/include/linux/ulpi/interface.h +++ b/include/linux/ulpi/interface.h @@ -15,6 +15,8 @@ struct ulpi_ops { struct device *dev; int (*read)(struct ulpi_ops *ops, u8 addr); int (*write)(struct ulpi_ops *ops, u8 addr, u8 val); + int (*read_dev)(struct device *dev, u8 addr); + int (*write_dev)(struct device *dev, u8 addr, u8 val); }; struct ulpi *ulpi_register_interface(struct device *, struct ulpi_ops *); -- cgit v1.2.3 From 5c42f38795645834a7c23998bd74d35a37bff078 Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Tue, 16 Aug 2016 19:04:49 +0300 Subject: usb: ulpi: remove calls to old api callbacks Now that all users use the new api callbacks, remove the old api callbacks and force new interface drivers to use the new api. Acked-by: Heikki Krogerus Signed-off-by: Tal Shorer Signed-off-by: Felipe Balbi --- include/linux/ulpi/interface.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ulpi/interface.h b/include/linux/ulpi/interface.h index d8189d08eddb..71f3c99771aa 100644 --- a/include/linux/ulpi/interface.h +++ b/include/linux/ulpi/interface.h @@ -13,8 +13,6 @@ struct ulpi; */ struct ulpi_ops { struct device *dev; - int (*read)(struct ulpi_ops *ops, u8 addr); - int (*write)(struct ulpi_ops *ops, u8 addr, u8 val); int (*read_dev)(struct device *dev, u8 addr); int (*write_dev)(struct device *dev, u8 addr, u8 val); }; -- cgit v1.2.3 From e6f74849784ccf275226d5d3ddfb96c71fa90383 Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Tue, 16 Aug 2016 19:04:50 +0300 Subject: usb: ulpi: rename operations {read|write}_dev to simply {read|write} With the removal of the old {read|write} operations, we can now safely rename the new api operations {read|write}_dev to use the shorter and clearer names {read|write}, respectively. Acked-by: Heikki Krogerus Signed-off-by: Tal Shorer Signed-off-by: Felipe Balbi --- include/linux/ulpi/interface.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ulpi/interface.h b/include/linux/ulpi/interface.h index 71f3c99771aa..ac3cd8058d9c 100644 --- a/include/linux/ulpi/interface.h +++ b/include/linux/ulpi/interface.h @@ -13,8 +13,8 @@ struct ulpi; */ struct ulpi_ops { struct device *dev; - int (*read_dev)(struct device *dev, u8 addr); - int (*write_dev)(struct device *dev, u8 addr, u8 val); + int (*read)(struct device *dev, u8 addr); + int (*write)(struct device *dev, u8 addr, u8 val); }; struct ulpi *ulpi_register_interface(struct device *, struct ulpi_ops *); -- cgit v1.2.3 From 042b0f31b2a87799a9c832f71474c5be3517e139 Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Tue, 16 Aug 2016 19:04:51 +0300 Subject: usb: ulpi: remove "dev" field from struct ulpi_ops Operations now use ulpi->dev.parent directly instead of via the ulpi_ops struct, making this field unused. Remove it. Acked-by: Heikki Krogerus Signed-off-by: Tal Shorer Signed-off-by: Felipe Balbi --- include/linux/ulpi/interface.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ulpi/interface.h b/include/linux/ulpi/interface.h index ac3cd8058d9c..cdedac87ed48 100644 --- a/include/linux/ulpi/interface.h +++ b/include/linux/ulpi/interface.h @@ -4,15 +4,14 @@ #include struct ulpi; +struct device; /** * struct ulpi_ops - ULPI register access - * @dev: the interface provider * @read: read operation for ULPI register access * @write: write operation for ULPI register access */ struct ulpi_ops { - struct device *dev; int (*read)(struct device *dev, u8 addr); int (*write)(struct device *dev, u8 addr, u8 val); }; -- cgit v1.2.3 From b9454f90c9432e1a70389c26c34e972090efcec6 Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Tue, 16 Aug 2016 19:04:52 +0300 Subject: usb: ulpi: make ops struct constant None of the core ulpi functions perform any changes to the operations struct, and logically as a struct that contains function pointers there's no reason it shouldn't be constant. Acked-by: Heikki Krogerus Signed-off-by: Tal Shorer Signed-off-by: Felipe Balbi --- include/linux/ulpi/driver.h | 2 +- include/linux/ulpi/interface.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ulpi/driver.h b/include/linux/ulpi/driver.h index 388f6e08b9d4..a44408f6d532 100644 --- a/include/linux/ulpi/driver.h +++ b/include/linux/ulpi/driver.h @@ -15,7 +15,7 @@ struct ulpi_ops; */ struct ulpi { struct ulpi_device_id id; - struct ulpi_ops *ops; + const struct ulpi_ops *ops; struct device dev; }; diff --git a/include/linux/ulpi/interface.h b/include/linux/ulpi/interface.h index cdedac87ed48..a2011a919eb6 100644 --- a/include/linux/ulpi/interface.h +++ b/include/linux/ulpi/interface.h @@ -16,7 +16,7 @@ struct ulpi_ops { int (*write)(struct device *dev, u8 addr, u8 val); }; -struct ulpi *ulpi_register_interface(struct device *, struct ulpi_ops *); +struct ulpi *ulpi_register_interface(struct device *, const struct ulpi_ops *); void ulpi_unregister_interface(struct ulpi *); #endif /* __LINUX_ULPI_INTERFACE_H */ -- cgit v1.2.3 From b828f960215f02e5d2c88bbd27565c694254a15a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 2 Sep 2016 10:35:18 +0100 Subject: ARM: 8611/1: l2x0: add PMU support The L2C-220 (AKA L220) and L2C-310 (AKA PL310) cache controllers feature a Performance Monitoring Unit (PMU), which can be useful for tuning and/or debugging. This hardware is always present and the relevant registers are accessible to non-secure accesses. Thus, no special firmware interface is necessary. This patch adds support for the PMU, plugging into the usual perf infrastructure. The overflow interrupt is not always available (e.g. on RealView PBX A9 it is not wired up at all), and the hardware counters saturate, so the driver does not make use of this. Instead, the driver periodically polls and reset counters as required to avoid losing events due to saturation. Signed-off-by: Mark Rutland Acked-by: Pawel Moll Tested-by: Kim Phillips Cc: Russell King Cc: Will Deacon Signed-off-by: Russell King --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 242bf530edfc..7e1ba14a3d78 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -86,6 +86,7 @@ enum cpuhp_state { CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, + CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_NOTIFY_ONLINE, -- cgit v1.2.3 From ee1e714b94521b0bb27b04dfd1728ec51b19d4f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 18 Aug 2016 14:57:16 +0200 Subject: cpu/hotplug: Remove CPU_STARTING and CPU_DYING notifier All users are converted to state machine, remove CPU_STARTING and the corresponding CPU_DYING. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160818125731.27256-2-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 12 ------------ include/linux/cpuhotplug.h | 1 - 2 files changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 797d9c8e9a1b..6bf1992fe638 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -61,17 +61,8 @@ struct notifier_block; #define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ -#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task, - * not handling interrupts, soon dead. - * Called on the dying cpu, interrupts - * are already disabled. Must not - * sleep, must not fail */ #define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug * lock is dropped */ -#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running. - * Called on the new cpu, just before - * enabling interrupts. Must not sleep, - * must not fail */ #define CPU_BROKEN 0x000B /* CPU (unsigned)v did not die properly, * perhaps due to preemption. */ @@ -86,9 +77,6 @@ struct notifier_block; #define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) -#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) -#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN) - #ifdef CONFIG_SMP extern bool cpuhp_tasks_frozen; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index b95f7adfbf8b..9e6d10786e29 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -69,7 +69,6 @@ enum cpuhp_state { CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, CPUHP_AP_X86_TBOOT_DYING, - CPUHP_AP_NOTIFY_STARTING, CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, -- cgit v1.2.3 From 017c59c042d01fc84cae7a8ea475861e702c77ab Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Fri, 2 Sep 2016 21:47:38 +0200 Subject: relay: Use per CPU constructs for the relay channel buffer pointers relay essentially needs to maintain a per CPU array of channel buffer pointers but it manually creates that array. Instead its better to use the per CPU constructs, provided by the kernel, to allocate & access the array of pointer to channel buffers. Signed-off-by: Akash Goel Reviewed-by: Chris Wilson Link: http://lkml.kernel.org/r/1470909140-25919-1-git-send-email-akash.goel@intel.com Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/relay.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index d7c8359693c6..eb295e373b90 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -19,6 +19,7 @@ #include #include #include +#include /* * Tracks changes to rchan/rchan_buf structs @@ -63,7 +64,7 @@ struct rchan struct kref kref; /* channel refcount */ void *private_data; /* for user-defined data */ size_t last_toobig; /* tried to log event > subbuf size */ - struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */ + struct rchan_buf ** __percpu buf; /* per-cpu channel buffers */ int is_global; /* One global buffer ? */ struct list_head list; /* for channel list */ struct dentry *parent; /* parent dentry passed to open */ @@ -204,7 +205,7 @@ static inline void relay_write(struct rchan *chan, struct rchan_buf *buf; local_irq_save(flags); - buf = chan->buf[smp_processor_id()]; + buf = *this_cpu_ptr(chan->buf); if (unlikely(buf->offset + length > chan->subbuf_size)) length = relay_switch_subbuf(buf, length); memcpy(buf->data + buf->offset, data, length); @@ -230,12 +231,12 @@ static inline void __relay_write(struct rchan *chan, { struct rchan_buf *buf; - buf = chan->buf[get_cpu()]; + buf = *get_cpu_ptr(chan->buf); if (unlikely(buf->offset + length > buf->chan->subbuf_size)) length = relay_switch_subbuf(buf, length); memcpy(buf->data + buf->offset, data, length); buf->offset += length; - put_cpu(); + put_cpu_ptr(chan->buf); } /** @@ -251,17 +252,19 @@ static inline void __relay_write(struct rchan *chan, */ static inline void *relay_reserve(struct rchan *chan, size_t length) { - void *reserved; - struct rchan_buf *buf = chan->buf[smp_processor_id()]; + void *reserved = NULL; + struct rchan_buf *buf = *get_cpu_ptr(chan->buf); if (unlikely(buf->offset + length > buf->chan->subbuf_size)) { length = relay_switch_subbuf(buf, length); if (!length) - return NULL; + goto end; } reserved = buf->data + buf->offset; buf->offset += length; +end: + put_cpu_ptr(chan->buf); return reserved; } -- cgit v1.2.3 From e6d4989a9ad1ccc343f29578a461612ed80fc6c5 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 18 Aug 2016 14:57:17 +0200 Subject: relayfs: Convert to hotplug state machine Install the callbacks via the state machine. They are installed at run time but relay_prepare_cpu() does not need to be invoked by the boot CPU because relay_open() was not yet invoked and there are no pools that need to be created. Signed-off-by: Richard Weinberger Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: rt@linutronix.de Cc: Andrew Morton Link: http://lkml.kernel.org/r/20160818125731.27256-3-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + include/linux/relay.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 9e6d10786e29..4c79f40fcebc 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -21,6 +21,7 @@ enum cpuhp_state { CPUHP_PROFILE_PREPARE, CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, + CPUHP_RELAY_PREPARE, CPUHP_RCUTREE_PREP, CPUHP_NOTIFY_PREPARE, CPUHP_TIMERS_DEAD, diff --git a/include/linux/relay.h b/include/linux/relay.h index eb295e373b90..ecbb34a382b8 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -288,5 +288,11 @@ static inline void subbuf_start_reserve(struct rchan_buf *buf, */ extern const struct file_operations relay_file_operations; +#ifdef CONFIG_RELAY +int relay_prepare_cpu(unsigned int cpu); +#else +#define relay_prepare_cpu NULL +#endif + #endif /* _LINUX_RELAY_H */ -- cgit v1.2.3 From 6731d4f12315aed5f7eefc52dac30428e382d7d0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 23 Aug 2016 14:53:19 +0200 Subject: slab: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Richard Weinberger Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Pekka Enberg Cc: linux-mm@kvack.org Cc: rt@linutronix.de Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Christoph Lameter Link: http://lkml.kernel.org/r/20160823125319.abeapfjapf2kfezp@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + include/linux/slab.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4c79f40fcebc..c2cf14953abc 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -22,6 +22,7 @@ enum cpuhp_state { CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, CPUHP_RELAY_PREPARE, + CPUHP_SLAB_PREPARE, CPUHP_RCUTREE_PREP, CPUHP_NOTIFY_PREPARE, CPUHP_TIMERS_DEAD, diff --git a/include/linux/slab.h b/include/linux/slab.h index 4293808d8cfb..084b12bad198 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -650,4 +650,12 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) unsigned int kmem_cache_size(struct kmem_cache *s); void __init kmem_cache_init_late(void); +#if defined(CONFIG_SMP) && defined(CONFIG_SLAB) +int slab_prepare_cpu(unsigned int cpu); +int slab_dead_cpu(unsigned int cpu); +#else +#define slab_prepare_cpu NULL +#define slab_dead_cpu NULL +#endif + #endif /* _LINUX_SLAB_H */ -- cgit v1.2.3 From a96a87bf949d249039cdf532bb5f5d06622cc5e2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Aug 2016 14:57:19 +0200 Subject: slub: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: Andrew Morton Cc: Peter Zijlstra Cc: Pekka Enberg Cc: linux-mm@kvack.org Cc: rt@linutronix.de Cc: David Rientjes Cc: Christoph Lameter Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20160818125731.27256-5-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c2cf14953abc..82ee32107dff 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -15,6 +15,7 @@ enum cpuhp_state { CPUHP_X86_HPET_DEAD, CPUHP_X86_APB_DEAD, CPUHP_VIRT_NET_DEAD, + CPUHP_SLUB_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 1d7ac6aec947d222042b6d22b3cec109db4fd19e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Aug 2016 14:57:20 +0200 Subject: mm/writeback: Convert to hotplug state machine Install the callbacks via the state machine and let the core invoke the callbacks on the already online CPUs. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Jens Axboe Cc: linux-mm@kvack.org Cc: rt@linutronix.de Cc: Tejun Heo Link: http://lkml.kernel.org/r/20160818125731.27256-6-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 82ee32107dff..854e59a426d4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -16,6 +16,7 @@ enum cpuhp_state { CPUHP_X86_APB_DEAD, CPUHP_VIRT_NET_DEAD, CPUHP_SLUB_DEAD, + CPUHP_MM_WRITEBACK_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From c4544dbc7a9bce3da6fa2361cd68cadb34e9221f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Aug 2016 14:57:21 +0200 Subject: kernel/softirq: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160818125731.27256-7-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 854e59a426d4..a421407a317f 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -17,6 +17,7 @@ enum cpuhp_state { CPUHP_VIRT_NET_DEAD, CPUHP_SLUB_DEAD, CPUHP_MM_WRITEBACK_DEAD, + CPUHP_SOFTIRQ_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 84a3f4db039e7c4bfe8ae9bebdebdf2a4e09bf86 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Aug 2016 14:57:23 +0200 Subject: net/mvneta: Convert to hotplug state machine Install the callbacks via the state machine and let the core invoke the callbacks on the already online CPUs. Signed-off-by: Sebastian Andrzej Siewior Cc: Thomas Petazzoni Cc: Peter Zijlstra Cc: netdev@vger.kernel.org Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160818125731.27256-9-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index a421407a317f..332b39c21d2e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -18,6 +18,7 @@ enum cpuhp_state { CPUHP_SLUB_DEAD, CPUHP_MM_WRITEBACK_DEAD, CPUHP_SOFTIRQ_DEAD, + CPUHP_NET_MVNETA_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 29c6d1bbd7a2cd88a197ea7cef171f616e198526 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Aug 2016 14:57:24 +0200 Subject: md/raid5: Convert to hotplug state machine Install the callbacks via the state machine and let the core invoke the callbacks on the already online CPUs. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Neil Brown Cc: linux-raid@vger.kernel.org Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160818125731.27256-10-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 332b39c21d2e..4066c74bb73c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -27,6 +27,7 @@ enum cpuhp_state { CPUHP_SMPCFD_PREPARE, CPUHP_RELAY_PREPARE, CPUHP_SLAB_PREPARE, + CPUHP_MD_RAID5_PREPARE, CPUHP_RCUTREE_PREP, CPUHP_NOTIFY_PREPARE, CPUHP_TIMERS_DEAD, -- cgit v1.2.3 From 529351fd3c50215a462e5e604d7ceaaf27a8a0e5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Aug 2016 14:57:25 +0200 Subject: cpuidle/pseries: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: linux-pm@vger.kernel.org Cc: Peter Zijlstra Cc: Daniel Lezcano Cc: "Rafael J. Wysocki" Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160818125731.27256-11-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4066c74bb73c..0fb22b95649f 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -19,6 +19,7 @@ enum cpuhp_state { CPUHP_MM_WRITEBACK_DEAD, CPUHP_SOFTIRQ_DEAD, CPUHP_NET_MVNETA_DEAD, + CPUHP_CPUIDLE_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From dfc616d8b3df3013c579e023e67f29ada60bdd50 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 24 Aug 2016 11:14:44 +0200 Subject: cpuidle/coupled: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: linux-pm@vger.kernel.org Cc: Peter Zijlstra Cc: Daniel Lezcano Cc: "Rafael J. Wysocki" Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160824091444.brdr5zpbxjvh6n3f@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0fb22b95649f..e8608774b5da 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -30,6 +30,7 @@ enum cpuhp_state { CPUHP_SLAB_PREPARE, CPUHP_MD_RAID5_PREPARE, CPUHP_RCUTREE_PREP, + CPUHP_CPUIDLE_COUPLED_PREPARE, CPUHP_NOTIFY_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_BRINGUP_CPU, -- cgit v1.2.3 From 68e694dcef246f0c8f6738b3aa628f8aa7186796 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Aug 2016 14:57:30 +0200 Subject: powerpc/powermac: Convert to hotplug state machine Install the callbacks via the state machine. I assume here that the powermac has two CPUs and so only one can go up or down at a time. The variable smp_core99_host_open is here to ensure that we do not try to open or close the i2c host twice if something goes wrong and we invoke the prepare or online callback twice due to rollback. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: rt@linutronix.de Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20160818125731.27256-16-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e8608774b5da..33fba43ad292 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -31,6 +31,7 @@ enum cpuhp_state { CPUHP_MD_RAID5_PREPARE, CPUHP_RCUTREE_PREP, CPUHP_CPUIDLE_COUPLED_PREPARE, + CPUHP_POWERPC_PMAC_PREPARE, CPUHP_NOTIFY_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_BRINGUP_CPU, -- cgit v1.2.3 From da3ed6519b19a9def0fcb966c6274946ad18d9a6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 18 Aug 2016 14:57:31 +0200 Subject: powerpc/mmu nohash: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: rt@linutronix.de Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20160818125731.27256-17-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 33fba43ad292..afd59e2ca4b3 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -32,6 +32,7 @@ enum cpuhp_state { CPUHP_RCUTREE_PREP, CPUHP_CPUIDLE_COUPLED_PREPARE, CPUHP_POWERPC_PMAC_PREPARE, + CPUHP_POWERPC_MMU_CTX_PREPARE, CPUHP_NOTIFY_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_BRINGUP_CPU, -- cgit v1.2.3 From cd5830512044d301d397d1499738ee00a8836247 Mon Sep 17 00:00:00 2001 From: Loic PALLARDY Date: Tue, 6 Sep 2016 09:39:42 +0200 Subject: remoteproc: Modify FW_RSC_ADDR_ANY definition Replace 0xFFFFFFFFFFFFFFFF by -1 to fit any type. Signed-off-by: Loic Pallardy Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index d488f9e1e08c..80e1cba78a04 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -118,7 +118,7 @@ enum fw_resource_type { RSC_LAST = 4, }; -#define FW_RSC_ADDR_ANY (0xFFFFFFFFFFFFFFFF) +#define FW_RSC_ADDR_ANY (-1) /** * struct fw_rsc_carveout - physically contiguous memory request -- cgit v1.2.3 From 21b6657ef4458f90d64b696105e3898257dea221 Mon Sep 17 00:00:00 2001 From: Loic PALLARDY Date: Tue, 6 Sep 2016 09:39:43 +0200 Subject: remoteproc: core: transform struct fw_rsc_vdev_vring reserved field in pa In current implementation, struct fw_rsc_vdev_vring which describes vring resource in firmware resource table owns only device address, because it assumes that host is responsible of vring allocation and only device address is needed by coprocessor. But if vrings need to be fixed in system memory map for any reasons (security, SoC charactieristics...), physical address is needed exatly identified the memory chunck by host. For that let's transform reserved field of struct fw_rsc_vdev_vring to pa (physical address). Signed-off-by: Loic Pallardy Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 80e1cba78a04..c321eab5054e 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -241,7 +241,7 @@ struct fw_rsc_trace { * @notifyid is a unique rproc-wide notify index for this vring. This notify * index is used when kicking a remote processor, to let it know that this * vring is triggered. - * @reserved: reserved (must be zero) + * @pa: physical address * * This descriptor is not a resource entry by itself; it is part of the * vdev resource type (see below). @@ -255,7 +255,7 @@ struct fw_rsc_vdev_vring { u32 align; u32 num; u32 notifyid; - u32 reserved; + u32 pa; } __packed; /** -- cgit v1.2.3 From 032c3d86b4acc4c21e435c85c454eac670c15851 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Thu, 25 Aug 2016 17:26:10 -0600 Subject: PCI/AER: Add bus flag to skip source ID matching Allow root port buses to choose to skip source id matching when finding the faulting device. Certain root port devices may return an incorrect source ID and recommend to scan child device registers for AER notifications. Signed-off-by: Jon Derrick Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 2599a980340f..57bc838e0666 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -187,8 +187,9 @@ enum pci_irq_reroute_variant { typedef unsigned short __bitwise pci_bus_flags_t; enum pci_bus_flags { - PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, - PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2, + PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, + PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2, + PCI_BUS_FLAGS_NO_AERSID = (__force pci_bus_flags_t) 4, }; /* These values come from the PCI Express Spec */ -- cgit v1.2.3 From 65a532f3d50a266bcc5e9c7efc636006565e8e7e Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Tue, 6 Sep 2016 12:23:21 -0700 Subject: Revert "Drivers: hv: ring_buffer: count on wrap around mappings in get_next_pkt_raw()" To deal with the merge conflict between net-next and char-misc trees, revert commit bb08d431a914 from char-misc tree. This commit can be rebased and applied once net-next picks up char-misc changes. Here is the commit log of the reverted patch: "With wrap around mappings in place we can always provide drivers with direct links to packets on the ring buffer, even when they wrap around. Do the required updates to get_next_pkt_raw()/put_pkt_raw()" Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c877e7980585..613074ef1ce9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1492,23 +1492,31 @@ static inline struct vmpacket_descriptor * get_next_pkt_raw(struct vmbus_channel *channel) { struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 priv_read_loc = ring_info->priv_read_index; + u32 read_loc = ring_info->priv_read_index; void *ring_buffer = hv_get_ring_buffer(ring_info); + struct vmpacket_descriptor *cur_desc; + u32 packetlen; u32 dsize = ring_info->ring_datasize; - /* - * delta is the difference between what is available to read and - * what was already consumed in place. We commit read index after - * the whole batch is processed. - */ - u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ? - priv_read_loc - ring_info->ring_buffer->read_index : - (dsize - ring_info->ring_buffer->read_index) + priv_read_loc; + u32 delta = read_loc - ring_info->ring_buffer->read_index; u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) return NULL; - return ring_buffer + priv_read_loc; + if ((read_loc + sizeof(*cur_desc)) > dsize) + return NULL; + + cur_desc = ring_buffer + read_loc; + packetlen = cur_desc->len8 << 3; + + /* + * If the packet under consideration is wrapping around, + * return failure. + */ + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) + return NULL; + + return cur_desc; } /* @@ -1520,14 +1528,16 @@ static inline void put_pkt_raw(struct vmbus_channel *channel, struct vmpacket_descriptor *desc) { struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; u32 packetlen = desc->len8 << 3; u32 dsize = ring_info->ring_datasize; + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) + BUG(); /* * Include the packet trailer. */ ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; - ring_info->priv_read_index %= dsize; } /* -- cgit v1.2.3 From ecc6569f3503b39f45bc6b86197b5e0a8533fb72 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 25 Aug 2016 23:08:11 +0800 Subject: netfilter: gre: Use consistent GRE_* macros instead of ones defined by netfilter. There are already some GRE_* macros in kernel, so it is unnecessary to define these macros. And remove some useless macros Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_proto_gre.h | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index df78dc2b5524..0189747f2691 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -1,29 +1,11 @@ #ifndef _CONNTRACK_PROTO_GRE_H #define _CONNTRACK_PROTO_GRE_H #include +#include +#include /* GRE PROTOCOL HEADER */ -/* GRE Version field */ -#define GRE_VERSION_1701 0x0 -#define GRE_VERSION_PPTP 0x1 - -/* GRE Protocol field */ -#define GRE_PROTOCOL_PPTP 0x880B - -/* GRE Flags */ -#define GRE_FLAG_C 0x80 -#define GRE_FLAG_R 0x40 -#define GRE_FLAG_K 0x20 -#define GRE_FLAG_S 0x10 -#define GRE_FLAG_A 0x80 - -#define GRE_IS_C(f) ((f)&GRE_FLAG_C) -#define GRE_IS_R(f) ((f)&GRE_FLAG_R) -#define GRE_IS_K(f) ((f)&GRE_FLAG_K) -#define GRE_IS_S(f) ((f)&GRE_FLAG_S) -#define GRE_IS_A(f) ((f)&GRE_FLAG_A) - /* GRE is a mess: Four different standards */ struct gre_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) -- cgit v1.2.3 From c579a9e7d58f66030a144c7a33cc9bdf827a4b6d Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 25 Aug 2016 23:08:47 +0800 Subject: netfilter: gre: Use consistent GRE and PTTP header structure instead of the ones defined by netfilter There are two existing strutures which defines the GRE and PPTP header. So use these two structures instead of the ones defined by netfilter to keep consitent with other codes. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_proto_gre.h | 42 ------------------------ 1 file changed, 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 0189747f2691..dee0acd0dd31 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -4,48 +4,6 @@ #include #include -/* GRE PROTOCOL HEADER */ - -/* GRE is a mess: Four different standards */ -struct gre_hdr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 rec:3, - srr:1, - seq:1, - key:1, - routing:1, - csum:1, - version:3, - reserved:4, - ack:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 csum:1, - routing:1, - key:1, - seq:1, - srr:1, - rec:3, - ack:1, - reserved:4, - version:3; -#else -#error "Adjust your defines" -#endif - __be16 protocol; -}; - -/* modified GRE header for PPTP */ -struct gre_hdr_pptp { - __u8 flags; /* bitfield */ - __u8 version; /* should be GRE_VERSION_PPTP */ - __be16 protocol; /* should be GRE_PROTOCOL_PPTP */ - __be16 payload_len; /* size of ppp payload, not inc. gre header */ - __be16 call_id; /* peer's call_id for this session */ - __be32 seq; /* sequence number. Present if S==1 */ - __be32 ack; /* seq number of highest packet received by */ - /* sender in this session */ -}; - struct nf_ct_gre { unsigned int stream_timeout; unsigned int timeout; -- cgit v1.2.3 From ef0da55a84a345f323ceddda3b6c78b25de90435 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 5 Sep 2016 18:25:47 +0100 Subject: jump_labels: Allow array initialisers The static key API is currently designed around single variable definitions. There are cases where an array of static keys is desirable, so extend the API to allow this rather than using the internal static key implementation directly. Cc: Jason Baron Cc: Jonathan Corbet Acked-by: Peter Zijlstra (Intel) Suggested-by: Dave P Martin Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- include/linux/jump_label.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 661af564fae8..a534c7f15a61 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -21,6 +21,8 @@ * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); + * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); + * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); * static_branch_likely() * static_branch_unlikely() * @@ -270,6 +272,16 @@ struct static_key_false { #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT +#define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \ + struct static_key_true name[count] = { \ + [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \ + } + +#define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count) \ + struct static_key_false name[count] = { \ + [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ + } + extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ -- cgit v1.2.3 From 0f98829a99850836cf7c2cc9fbf1d7ce0f795780 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 7 Sep 2016 05:39:34 -0700 Subject: Drivers: hv: vmbus: suppress some "hv_vmbus: Unknown GUID" warnings Some VMBus devices are not needed by Linux guest[1][2], and, VMBus channels of Hyper-V Sockets don't really mean usual synthetic devices, so let's suppress the warnings for them. [1] https://support.microsoft.com/en-us/kb/2925727 [2] https://msdn.microsoft.com/en-us/library/jj980180(v=winembedded.81).aspx Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 613074ef1ce9..430619a92d3b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1314,6 +1314,27 @@ u64 hv_do_hypercall(u64 control, void *input, void *output); .guid = UUID_LE(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \ 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f) +/* + * Linux doesn't support the 3 devices: the first two are for + * Automatic Virtual Machine Activation, and the third is for + * Remote Desktop Virtualization. + * {f8e65716-3cb3-4a06-9a60-1889c5cccab5} + * {3375baf4-9e15-4b30-b765-67acb10d607b} + * {276aacf4-ac15-426c-98dd-7521ad3f01fe} + */ + +#define HV_AVMA1_GUID \ + .guid = UUID_LE(0xf8e65716, 0x3cb3, 0x4a06, 0x9a, 0x60, \ + 0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5) + +#define HV_AVMA2_GUID \ + .guid = UUID_LE(0x3375baf4, 0x9e15, 0x4b30, 0xb7, 0x65, \ + 0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b) + +#define HV_RDV_GUID \ + .guid = UUID_LE(0x276aacf4, 0xac15, 0x426c, 0x98, 0xdd, \ + 0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe) + /* * Common header for Hyper-V ICs */ -- cgit v1.2.3 From c965db44462919f613973aa618271f6c3f5a1e64 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 7 Sep 2016 16:36:24 +0300 Subject: qed: Add support for debug data collection This patch adds the support for dumping and formatting the HW/FW debug data. Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 70b30e4d3cc4..19027635df0d 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -143,6 +143,9 @@ #define GTT_BYTE_SIZE_BITS (GTT_DWORD_SIZE_BITS + 2) #define GTT_DWORD_SIZE BIT(GTT_DWORD_SIZE_BITS) +/* Tools Version */ +#define TOOLS_VERSION 10 + /*****************/ /* CDU CONSTANTS */ /*****************/ -- cgit v1.2.3 From e0971c832af4cd906ab931c9f6e9e1791a62fc98 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 7 Sep 2016 16:36:25 +0300 Subject: qed*: Add support for the ethtool get_regs operation Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index d8dc5c2243d5..e4546abcea08 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -455,6 +455,10 @@ struct qed_common_ops { void (*simd_handler_clean)(struct qed_dev *cdev, int index); + int (*dbg_all_data) (struct qed_dev *cdev, void *buffer); + + int (*dbg_all_data_size) (struct qed_dev *cdev); + /** * @brief can_link_change - can the instance change the link or not * -- cgit v1.2.3 From 8e1d260738ca89bc7c87444f95f04a026d12b496 Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Thu, 8 Sep 2016 05:24:14 -0700 Subject: Drivers: hv: utils: Support TimeSync version 4.0 protocol samples. This enables support for more accurate TimeSync v4 samples when hosted under Windows Server 2016 and newer hosts. The new time samples include a "vmreferencetime" field that represents the guest's TSC value when the host generated its time sample. This value lets the guest calculate the latency in receiving the time sample. The latency is added to the sample host time prior to updating the clock. Signed-off-by: Alex Ng Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 430619a92d3b..7d7cbff33bda 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1423,6 +1423,15 @@ struct ictimesync_data { u8 flags; } __packed; +struct ictimesync_ref_data { + u64 parenttime; + u64 vmreferencetime; + u8 flags; + char leapflags; + char stratum; + u8 reserved[3]; +} __packed; + struct hyperv_service_callback { u8 msg_type; char *log_msg; -- cgit v1.2.3 From c950fd6f201aea649932898206a850f0a7f25603 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jul 2016 00:08:25 +0900 Subject: pstore: Split pstore fragile flags This patch adds new PSTORE_FLAGS for each pstore type so that they can be enabled separately. This is a preparation for ongoing virtio-pstore work to support those types flexibly. The PSTORE_FLAGS_FRAGILE is changed to PSTORE_FLAGS_DMESG to preserve the original behavior. Cc: Anton Vorontsov Cc: Colin Cross Cc: Kees Cook Cc: Tony Luck Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Matt Fleming Cc: linux-acpi@vger.kernel.org Cc: linux-efi@vger.kernel.org Signed-off-by: Namhyung Kim [kees: retained "FRAGILE" for now to make merges easier] Signed-off-by: Kees Cook --- include/linux/pstore.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 899e95e84400..f5806b2a6f7a 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -74,7 +74,13 @@ struct pstore_info { void *data; }; -#define PSTORE_FLAGS_FRAGILE 1 +#define PSTORE_FLAGS_DMESG (1 << 0) +#define PSTORE_FLAGS_FRAGILE PSTORE_FLAGS_DMESG +#define PSTORE_FLAGS_CONSOLE (1 << 1) +#define PSTORE_FLAGS_FTRACE (1 << 2) +#define PSTORE_FLAGS_PMSG (1 << 3) + +#define PSTORE_FLAGS_ALL ((1 << 4) - 1) extern int pstore_register(struct pstore_info *); extern void pstore_unregister(struct pstore_info *); -- cgit v1.2.3 From 79d955af711a6e20207783590a2cfddbd649568b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 28 Jul 2016 00:08:26 +0900 Subject: pstore/ram: Set pstore flags dynamically The ramoops can be configured to enable each pstore type by setting their size. In that case, it'd be better not to register disabled types in the first place. Cc: Anton Vorontsov Cc: Colin Cross Cc: Kees Cook Cc: Tony Luck Signed-off-by: Namhyung Kim Signed-off-by: Kees Cook --- include/linux/pstore.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index f5806b2a6f7a..50d475228d97 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -80,8 +80,6 @@ struct pstore_info { #define PSTORE_FLAGS_FTRACE (1 << 2) #define PSTORE_FLAGS_PMSG (1 << 3) -#define PSTORE_FLAGS_ALL ((1 << 4) - 1) - extern int pstore_register(struct pstore_info *); extern void pstore_unregister(struct pstore_info *); extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason); -- cgit v1.2.3 From 5bf6d1b92715f224ef6e1c3abca5dd63eeb4915d Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Thu, 1 Sep 2016 08:13:46 -0700 Subject: pstore/pmsg: drop bounce buffer Removing a bounce buffer copy operation in the pmsg driver path is always better. We also gain in overall performance by not requesting a vmalloc on every write as this can cause precious RT tasks, such as user facing media operation, to stall while memory is being reclaimed. Added a write_buf_user to the pstore functions, a backup platform write_buf_user that uses the small buffer that is part of the instance, and implemented a ramoops write_buf_user that only supports PSTORE_TYPE_PMSG. Signed-off-by: Mark Salyzyn Signed-off-by: Kees Cook --- include/linux/pstore.h | 11 ++++++++--- include/linux/pstore_ram.h | 7 +++++-- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 50d475228d97..92013cc9cc8c 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -22,12 +22,13 @@ #ifndef _LINUX_PSTORE_H #define _LINUX_PSTORE_H -#include +#include +#include #include #include -#include #include -#include +#include +#include /* types */ enum pstore_type_id { @@ -68,6 +69,10 @@ struct pstore_info { enum kmsg_dump_reason reason, u64 *id, unsigned int part, const char *buf, bool compressed, size_t size, struct pstore_info *psi); + int (*write_buf_user)(enum pstore_type_id type, + enum kmsg_dump_reason reason, u64 *id, + unsigned int part, const char __user *buf, + bool compressed, size_t size, struct pstore_info *psi); int (*erase)(enum pstore_type_id type, u64 id, int count, struct timespec time, struct pstore_info *psi); diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 4660aaa3195e..c668c861c96c 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -17,11 +17,12 @@ #ifndef __LINUX_PSTORE_RAM_H__ #define __LINUX_PSTORE_RAM_H__ +#include #include +#include #include #include #include -#include struct persistent_ram_buffer; struct rs_control; @@ -59,7 +60,9 @@ void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); int persistent_ram_write(struct persistent_ram_zone *prz, const void *s, - unsigned int count); + unsigned int count); +int persistent_ram_write_user(struct persistent_ram_zone *prz, + const void __user *s, unsigned int count); void persistent_ram_save_old(struct persistent_ram_zone *prz); size_t persistent_ram_old_size(struct persistent_ram_zone *prz); -- cgit v1.2.3 From d545caca827b65aab557a9e9dcdcf1e5a3823c2d Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 8 Sep 2016 00:42:25 +0900 Subject: net: inet: diag: expose the socket mark to privileged processes. This adds the capability for a process that has CAP_NET_ADMIN on a socket to see the socket mark in socket dumps. Commit a52e95abf772 ("net: diag: allow socket bytecode filters to match socket marks") recently gave privileged processes the ability to filter socket dumps based on mark. This patch is complementary: it ensures that the mark is also passed to userspace in the socket's netlink attributes. It is useful for tools like ss which display information about sockets. Tested: https://android-review.googlesource.com/270210 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index feb04ea20f11..65da430e260f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -37,7 +37,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh); + const struct nlmsghdr *unlh, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, @@ -56,7 +56,7 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); + struct user_namespace *user_ns, bool net_admin); extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); -- cgit v1.2.3 From fe19c4f971a55cea3be442d8032a5f6021702791 Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Wed, 7 Sep 2016 12:56:58 -0400 Subject: vlan: Check for vlan ethernet types for 8021.q or 802.1ad This is to simplify using double tagged vlans. This function allows all valid vlan ethertypes to be checked in a single function call. Also replace some instances that check for both ETH_P_8021Q and ETH_P_8021AD. Patch based on one originally by Thomas F Herbert. Signed-off-by: Thomas F Herbert Signed-off-by: Eric Garver Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 49d4aef1f789..3319d97d789d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -272,6 +272,23 @@ static inline int vlan_get_encap_level(struct net_device *dev) } #endif +/** + * eth_type_vlan - check for valid vlan ether type. + * @ethertype: ether type to check + * + * Returns true if the ether type is a vlan ether type. + */ +static inline bool eth_type_vlan(__be16 ethertype) +{ + switch (ethertype) { + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): + return true; + default: + return false; + } +} + static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { @@ -425,8 +442,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data; - if (veth->h_vlan_proto != htons(ETH_P_8021Q) && - veth->h_vlan_proto != htons(ETH_P_8021AD)) + if (!eth_type_vlan(veth->h_vlan_proto)) return -EINVAL; *vlan_tci = ntohs(veth->h_vlan_TCI); @@ -488,7 +504,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, * present at mac_len - VLAN_HLEN (if mac_len > 0), or at * ETH_HLEN otherwise */ - if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + if (eth_type_vlan(type)) { if (vlan_depth) { if (WARN_ON(vlan_depth < VLAN_HLEN)) return 0; @@ -506,8 +522,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; - } while (type == htons(ETH_P_8021Q) || - type == htons(ETH_P_8021AD)); + } while (eth_type_vlan(type)); } if (depth) @@ -572,8 +587,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, static inline bool skb_vlan_tagged(const struct sk_buff *skb) { if (!skb_vlan_tag_present(skb) && - likely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + likely(!eth_type_vlan(skb->protocol))) return false; return true; @@ -593,15 +607,14 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) if (!skb_vlan_tag_present(skb)) { struct vlan_ethhdr *veh; - if (likely(protocol != htons(ETH_P_8021Q) && - protocol != htons(ETH_P_8021AD))) + if (likely(!eth_type_vlan(protocol))) return false; veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } - if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) + if (!eth_type_vlan(protocol)) return false; return true; -- cgit v1.2.3 From 9f5afeae51526b3ad7b7cb21ee8b145ce6ea7a7a Mon Sep 17 00:00:00 2001 From: Yaogong Wang Date: Wed, 7 Sep 2016 14:49:28 -0700 Subject: tcp: use an RB tree for ooo receive queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Over the years, TCP BDP has increased by several orders of magnitude, and some people are considering to reach the 2 Gbytes limit. Even with current window scale limit of 14, ~1 Gbytes maps to ~740,000 MSS. In presence of packet losses (or reorders), TCP stores incoming packets into an out of order queue, and number of skbs sitting there waiting for the missing packets to be received can be in the 10^5 range. Most packets are appended to the tail of this queue, and when packets can finally be transferred to receive queue, we scan the queue from its head. However, in presence of heavy losses, we might have to find an arbitrary point in this queue, involving a linear scan for every incoming packet, throwing away cpu caches. This patch converts it to a RB tree, to get bounded latencies. Yaogong wrote a preliminary patch about 2 years ago. Eric did the rebase, added ofo_last_skb cache, polishing and tests. Tested with network dropping between 1 and 10 % packets, with good success (about 30 % increase of throughput in stress tests) Next step would be to also use an RB tree for the write queue at sender side ;) Signed-off-by: Yaogong Wang Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Ilpo Järvinen Acked-By: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ include/linux/tcp.h | 7 +++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cfb7219be665..4c5662f05bda 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2402,6 +2402,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +void skb_rbtree_purge(struct rb_root *root); + void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7be9b1242354..c723a465125d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -281,10 +281,9 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; - /* OOO segments go in this list. Note that socket lock must be held, - * as we do not use sk_buff_head lock. - */ - struct sk_buff_head out_of_order_queue; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ -- cgit v1.2.3 From d560168b5d0fb4a70c74b386564072a819d9bf71 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 26 Aug 2015 19:38:11 -0700 Subject: hwmon: (core) New hwmon registration API Up to now, each hwmon driver has to implement its own sysfs attributes. This requires a lot of template code, and distracts from the driver's core function to read and write chip registers. To be able to reduce driver complexity, move sensor attribute handling and thermal zone registration into hwmon core. By using the new API, driver code and data size is typically reduced by 20-70%, depending on driver complexity and the number of sysfs attributes supported. With this patch, the new API only supports thermal sensors. Support for other sensor types will be added with subsequent patches. Acked-by: Punit Agrawal Reviewed-by: Jonathan Cameron Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 09354f6c1d63..52e56d71d742 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -14,9 +14,147 @@ #ifndef _HWMON_H_ #define _HWMON_H_ +#include + struct device; struct attribute_group; +enum hwmon_sensor_types { + hwmon_chip, + hwmon_temp, + hwmon_in, + hwmon_curr, + hwmon_power, + hwmon_energy, +}; + +enum hwmon_chip_attributes { + hwmon_chip_temp_reset_history, + hwmon_chip_register_tz, + hwmon_chip_update_interval, + hwmon_chip_alarms, +}; + +#define HWMON_C_TEMP_RESET_HISTORY BIT(hwmon_chip_temp_reset_history) +#define HWMON_C_IN_RESET_HISTORY BIT(hwmon_chip_in_reset_history) +#define HWMON_C_REGISTER_TZ BIT(hwmon_chip_register_tz) +#define HWMON_C_UPDATE_INTERVAL BIT(hwmon_chip_update_interval) +#define HWMON_C_ALARMS BIT(hwmon_chip_alarms) + +enum hwmon_temp_attributes { + hwmon_temp_input = 0, + hwmon_temp_type, + hwmon_temp_lcrit, + hwmon_temp_lcrit_hyst, + hwmon_temp_min, + hwmon_temp_min_hyst, + hwmon_temp_max, + hwmon_temp_max_hyst, + hwmon_temp_crit, + hwmon_temp_crit_hyst, + hwmon_temp_emergency, + hwmon_temp_emergency_hyst, + hwmon_temp_alarm, + hwmon_temp_lcrit_alarm, + hwmon_temp_min_alarm, + hwmon_temp_max_alarm, + hwmon_temp_crit_alarm, + hwmon_temp_emergency_alarm, + hwmon_temp_fault, + hwmon_temp_offset, + hwmon_temp_label, + hwmon_temp_lowest, + hwmon_temp_highest, + hwmon_temp_reset_history, +}; + +#define HWMON_T_INPUT BIT(hwmon_temp_input) +#define HWMON_T_TYPE BIT(hwmon_temp_type) +#define HWMON_T_LCRIT BIT(hwmon_temp_lcrit) +#define HWMON_T_LCRIT_HYST BIT(hwmon_temp_lcrit_hyst) +#define HWMON_T_MIN BIT(hwmon_temp_min) +#define HWMON_T_MIN_HYST BIT(hwmon_temp_min_hyst) +#define HWMON_T_MAX BIT(hwmon_temp_max) +#define HWMON_T_MAX_HYST BIT(hwmon_temp_max_hyst) +#define HWMON_T_CRIT BIT(hwmon_temp_crit) +#define HWMON_T_CRIT_HYST BIT(hwmon_temp_crit_hyst) +#define HWMON_T_EMERGENCY BIT(hwmon_temp_emergency) +#define HWMON_T_EMERGENCY_HYST BIT(hwmon_temp_emergency_hyst) +#define HWMON_T_MIN_ALARM BIT(hwmon_temp_min_alarm) +#define HWMON_T_MAX_ALARM BIT(hwmon_temp_max_alarm) +#define HWMON_T_CRIT_ALARM BIT(hwmon_temp_crit_alarm) +#define HWMON_T_EMERGENCY_ALARM BIT(hwmon_temp_emergency_alarm) +#define HWMON_T_FAULT BIT(hwmon_temp_fault) +#define HWMON_T_OFFSET BIT(hwmon_temp_offset) +#define HWMON_T_LABEL BIT(hwmon_temp_label) +#define HWMON_T_LOWEST BIT(hwmon_temp_lowest) +#define HWMON_T_HIGHEST BIT(hwmon_temp_highest) +#define HWMON_T_RESET_HISTORY BIT(hwmon_temp_reset_history) + +/** + * struct hwmon_ops - hwmon device operations + * @is_visible: Callback to return attribute visibility. Mandatory. + * Parameters are: + * @const void *drvdata: + * Pointer to driver-private data structure passed + * as argument to hwmon_device_register_with_info(). + * @type: Sensor type + * @attr: Sensor attribute + * @channel: + * Channel number + * The function returns the file permissions. + * If the return value is 0, no attribute will be created. + * @read: Read callback. Optional. If not provided, attributes + * will not be readable. + * Parameters are: + * @dev: Pointer to hardware monitoring device + * @type: Sensor type + * @attr: Sensor attribute + * @channel: + * Channel number + * @val: Pointer to returned value + * The function returns 0 on success or a negative error number. + * @write: Write callback. Optional. If not provided, attributes + * will not be writable. + * Parameters are: + * @dev: Pointer to hardware monitoring device + * @type: Sensor type + * @attr: Sensor attribute + * @channel: + * Channel number + * @val: Value to write + * The function returns 0 on success or a negative error number. + */ +struct hwmon_ops { + umode_t (*is_visible)(const void *drvdata, enum hwmon_sensor_types type, + u32 attr, int channel); + int (*read)(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val); + int (*write)(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val); +}; + +/** + * Channel information + * @type: Channel type. + * @config: Pointer to NULL-terminated list of channel parameters. + * Use for per-channel attributes. + */ +struct hwmon_channel_info { + enum hwmon_sensor_types type; + const u32 *config; +}; + +/** + * Chip configuration + * @ops: Pointer to hwmon operations. + * @info: Null-terminated list of channel information. + */ +struct hwmon_chip_info { + const struct hwmon_ops *ops; + const struct hwmon_channel_info **info; +}; + struct device *hwmon_device_register(struct device *dev); struct device * hwmon_device_register_with_groups(struct device *dev, const char *name, @@ -26,6 +164,16 @@ struct device * devm_hwmon_device_register_with_groups(struct device *dev, const char *name, void *drvdata, const struct attribute_group **groups); +struct device * +hwmon_device_register_with_info(struct device *dev, + const char *name, void *drvdata, + const struct hwmon_chip_info *info, + const struct attribute_group **groups); +struct device * +devm_hwmon_device_register_with_info(struct device *dev, + const char *name, void *drvdata, + const struct hwmon_chip_info *info, + const struct attribute_group **groups); void hwmon_device_unregister(struct device *dev); void devm_hwmon_device_unregister(struct device *dev); -- cgit v1.2.3 From 00d616cf872bb552a6853df288efcdb9f937a489 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 20 Jun 2016 11:01:57 -0700 Subject: hwmon: (core) Add voltage attribute support to new API Acked-by: Punit Agrawal Reviewed-by: Jonathan Cameron Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 52e56d71d742..a01b8e3fc6f3 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -30,6 +30,7 @@ enum hwmon_sensor_types { enum hwmon_chip_attributes { hwmon_chip_temp_reset_history, + hwmon_chip_in_reset_history, hwmon_chip_register_tz, hwmon_chip_update_interval, hwmon_chip_alarms, @@ -91,6 +92,40 @@ enum hwmon_temp_attributes { #define HWMON_T_HIGHEST BIT(hwmon_temp_highest) #define HWMON_T_RESET_HISTORY BIT(hwmon_temp_reset_history) +enum hwmon_in_attributes { + hwmon_in_input, + hwmon_in_min, + hwmon_in_max, + hwmon_in_lcrit, + hwmon_in_crit, + hwmon_in_average, + hwmon_in_lowest, + hwmon_in_highest, + hwmon_in_reset_history, + hwmon_in_label, + hwmon_in_alarm, + hwmon_in_min_alarm, + hwmon_in_max_alarm, + hwmon_in_lcrit_alarm, + hwmon_in_crit_alarm, +}; + +#define HWMON_I_INPUT BIT(hwmon_in_input) +#define HWMON_I_MIN BIT(hwmon_in_min) +#define HWMON_I_MAX BIT(hwmon_in_max) +#define HWMON_I_LCRIT BIT(hwmon_in_lcrit) +#define HWMON_I_CRIT BIT(hwmon_in_crit) +#define HWMON_I_AVERAGE BIT(hwmon_in_average) +#define HWMON_I_LOWEST BIT(hwmon_in_lowest) +#define HWMON_I_HIGHEST BIT(hwmon_in_highest) +#define HWMON_I_RESET_HISTORY BIT(hwmon_in_reset_history) +#define HWMON_I_LABEL BIT(hwmon_in_label) +#define HWMON_I_ALARM BIT(hwmon_in_alarm) +#define HWMON_I_MIN_ALARM BIT(hwmon_in_min_alarm) +#define HWMON_I_MAX_ALARM BIT(hwmon_in_max_alarm) +#define HWMON_I_LCRIT_ALARM BIT(hwmon_in_lcrit_alarm) +#define HWMON_I_CRIT_ALARM BIT(hwmon_in_crit_alarm) + /** * struct hwmon_ops - hwmon device operations * @is_visible: Callback to return attribute visibility. Mandatory. -- cgit v1.2.3 From 9b26947ce5b6a6d5f260d9564195e8971cc9713d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 20 Jun 2016 11:10:33 -0700 Subject: hwmon: (core) Add current attribute support to new API Acked-by: Punit Agrawal Reviewed-by: Jonathan Cameron Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index a01b8e3fc6f3..8781c2253b1d 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -31,6 +31,7 @@ enum hwmon_sensor_types { enum hwmon_chip_attributes { hwmon_chip_temp_reset_history, hwmon_chip_in_reset_history, + hwmon_chip_curr_reset_history, hwmon_chip_register_tz, hwmon_chip_update_interval, hwmon_chip_alarms, @@ -38,6 +39,7 @@ enum hwmon_chip_attributes { #define HWMON_C_TEMP_RESET_HISTORY BIT(hwmon_chip_temp_reset_history) #define HWMON_C_IN_RESET_HISTORY BIT(hwmon_chip_in_reset_history) +#define HWMON_C_CURR_RESET_HISTORY BIT(hwmon_chip_curr_reset_history) #define HWMON_C_REGISTER_TZ BIT(hwmon_chip_register_tz) #define HWMON_C_UPDATE_INTERVAL BIT(hwmon_chip_update_interval) #define HWMON_C_ALARMS BIT(hwmon_chip_alarms) @@ -126,6 +128,40 @@ enum hwmon_in_attributes { #define HWMON_I_LCRIT_ALARM BIT(hwmon_in_lcrit_alarm) #define HWMON_I_CRIT_ALARM BIT(hwmon_in_crit_alarm) +enum hwmon_curr_attributes { + hwmon_curr_input, + hwmon_curr_min, + hwmon_curr_max, + hwmon_curr_lcrit, + hwmon_curr_crit, + hwmon_curr_average, + hwmon_curr_lowest, + hwmon_curr_highest, + hwmon_curr_reset_history, + hwmon_curr_label, + hwmon_curr_alarm, + hwmon_curr_min_alarm, + hwmon_curr_max_alarm, + hwmon_curr_lcrit_alarm, + hwmon_curr_crit_alarm, +}; + +#define HWMON_C_INPUT BIT(hwmon_curr_input) +#define HWMON_C_MIN BIT(hwmon_curr_min) +#define HWMON_C_MAX BIT(hwmon_curr_max) +#define HWMON_C_LCRIT BIT(hwmon_curr_lcrit) +#define HWMON_C_CRIT BIT(hwmon_curr_crit) +#define HWMON_C_AVERAGE BIT(hwmon_curr_average) +#define HWMON_C_LOWEST BIT(hwmon_curr_lowest) +#define HWMON_C_HIGHEST BIT(hwmon_curr_highest) +#define HWMON_C_RESET_HISTORY BIT(hwmon_curr_reset_history) +#define HWMON_C_LABEL BIT(hwmon_curr_label) +#define HWMON_C_ALARM BIT(hwmon_curr_alarm) +#define HWMON_C_MIN_ALARM BIT(hwmon_curr_min_alarm) +#define HWMON_C_MAX_ALARM BIT(hwmon_curr_max_alarm) +#define HWMON_C_LCRIT_ALARM BIT(hwmon_curr_lcrit_alarm) +#define HWMON_C_CRIT_ALARM BIT(hwmon_curr_crit_alarm) + /** * struct hwmon_ops - hwmon device operations * @is_visible: Callback to return attribute visibility. Mandatory. -- cgit v1.2.3 From b308f5c744522de020da4706718de9076adeada7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 20 Jun 2016 11:27:36 -0700 Subject: hwmon: (core) Add power attribute support to new API Acked-by: Punit Agrawal Reviewed-by: Jonathan Cameron Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 8781c2253b1d..d7e432ef7c2a 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -32,6 +32,7 @@ enum hwmon_chip_attributes { hwmon_chip_temp_reset_history, hwmon_chip_in_reset_history, hwmon_chip_curr_reset_history, + hwmon_chip_power_reset_history, hwmon_chip_register_tz, hwmon_chip_update_interval, hwmon_chip_alarms, @@ -40,6 +41,7 @@ enum hwmon_chip_attributes { #define HWMON_C_TEMP_RESET_HISTORY BIT(hwmon_chip_temp_reset_history) #define HWMON_C_IN_RESET_HISTORY BIT(hwmon_chip_in_reset_history) #define HWMON_C_CURR_RESET_HISTORY BIT(hwmon_chip_curr_reset_history) +#define HWMON_C_POWER_RESET_HISTORY BIT(hwmon_chip_power_reset_history) #define HWMON_C_REGISTER_TZ BIT(hwmon_chip_register_tz) #define HWMON_C_UPDATE_INTERVAL BIT(hwmon_chip_update_interval) #define HWMON_C_ALARMS BIT(hwmon_chip_alarms) @@ -162,6 +164,58 @@ enum hwmon_curr_attributes { #define HWMON_C_LCRIT_ALARM BIT(hwmon_curr_lcrit_alarm) #define HWMON_C_CRIT_ALARM BIT(hwmon_curr_crit_alarm) +enum hwmon_power_attributes { + hwmon_power_average, + hwmon_power_average_interval, + hwmon_power_average_interval_max, + hwmon_power_average_interval_min, + hwmon_power_average_highest, + hwmon_power_average_lowest, + hwmon_power_average_max, + hwmon_power_average_min, + hwmon_power_input, + hwmon_power_input_highest, + hwmon_power_input_lowest, + hwmon_power_reset_history, + hwmon_power_accuracy, + hwmon_power_cap, + hwmon_power_cap_hyst, + hwmon_power_cap_max, + hwmon_power_cap_min, + hwmon_power_max, + hwmon_power_crit, + hwmon_power_label, + hwmon_power_alarm, + hwmon_power_cap_alarm, + hwmon_power_max_alarm, + hwmon_power_crit_alarm, +}; + +#define HWMON_P_AVERAGE BIT(hwmon_power_average) +#define HWMON_P_AVERAGE_INTERVAL BIT(hwmon_power_average_interval) +#define HWMON_P_AVERAGE_INTERVAL_MAX BIT(hwmon_power_average_interval_max) +#define HWMON_P_AVERAGE_INTERVAL_MIN BIT(hwmon_power_average_interval_min) +#define HWMON_P_AVERAGE_HIGHEST BIT(hwmon_power_average_highest) +#define HWMON_P_AVERAGE_LOWEST BIT(hwmon_power_average_lowest) +#define HWMON_P_AVERAGE_MAX BIT(hwmon_power_average_max) +#define HWMON_P_AVERAGE_MIN BIT(hwmon_power_average_min) +#define HWMON_P_INPUT BIT(hwmon_power_input) +#define HWMON_P_INPUT_HIGHEST BIT(hwmon_power_input_highest) +#define HWMON_P_INPUT_LOWEST BIT(hwmon_power_input_lowest) +#define HWMON_P_RESET_HISTORY BIT(hwmon_power_reset_history) +#define HWMON_P_ACCURACY BIT(hwmon_power_accuracy) +#define HWMON_P_CAP BIT(hwmon_power_cap) +#define HWMON_P_CAP_HYST BIT(hwmon_power_cap_hyst) +#define HWMON_P_CAP_MAX BIT(hwmon_power_cap_max) +#define HWMON_P_CAP_MIN BIT(hwmon_power_cap_min) +#define HWMON_P_MAX BIT(hwmon_power_max) +#define HWMON_P_CRIT BIT(hwmon_power_crit) +#define HWMON_P_LABEL BIT(hwmon_power_label) +#define HWMON_P_ALARM BIT(hwmon_power_alarm) +#define HWMON_P_CAP_ALARM BIT(hwmon_power_cap_alarm) +#define HWMON_P_MAX_ALARM BIT(hwmon_power_max_alarm) +#define HWMON_P_CRIT_ALARM BIT(hwmon_power_crit_alarm) + /** * struct hwmon_ops - hwmon device operations * @is_visible: Callback to return attribute visibility. Mandatory. -- cgit v1.2.3 From 6bfcca44a6e7b0a6d92eab39c4cb830516b9568c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 20 Jun 2016 11:38:37 -0700 Subject: hwmon: (core) Add energy and humidity attribute support to new API Acked-by: Punit Agrawal Reviewed-by: Jonathan Cameron Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index d7e432ef7c2a..57d92f1d779b 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -26,6 +26,7 @@ enum hwmon_sensor_types { hwmon_curr, hwmon_power, hwmon_energy, + hwmon_humidity, }; enum hwmon_chip_attributes { @@ -216,6 +217,34 @@ enum hwmon_power_attributes { #define HWMON_P_MAX_ALARM BIT(hwmon_power_max_alarm) #define HWMON_P_CRIT_ALARM BIT(hwmon_power_crit_alarm) +enum hwmon_energy_attributes { + hwmon_energy_input, + hwmon_energy_label, +}; + +#define HWMON_E_INPUT BIT(hwmon_energy_input) +#define HWMON_E_LABEL BIT(hwmon_energy_label) + +enum hwmon_humidity_attributes { + hwmon_humidity_input, + hwmon_humidity_label, + hwmon_humidity_min, + hwmon_humidity_min_hyst, + hwmon_humidity_max, + hwmon_humidity_max_hyst, + hwmon_humidity_alarm, + hwmon_humidity_fault, +}; + +#define HWMON_H_INPUT BIT(hwmon_humidity_input) +#define HWMON_H_LABEL BIT(hwmon_humidity_label) +#define HWMON_H_MIN BIT(hwmon_humidity_min) +#define HWMON_H_MIN_HYST BIT(hwmon_humidity_min_hyst) +#define HWMON_H_MAX BIT(hwmon_humidity_max) +#define HWMON_H_MAX_HYST BIT(hwmon_humidity_max_hyst) +#define HWMON_H_ALARM BIT(hwmon_humidity_alarm) +#define HWMON_H_FAULT BIT(hwmon_humidity_fault) + /** * struct hwmon_ops - hwmon device operations * @is_visible: Callback to return attribute visibility. Mandatory. -- cgit v1.2.3 From 8faee73f92cd4dd4928e6860001315a0cc834c99 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 25 Jun 2016 19:52:13 -0700 Subject: hwmon: (core) Add fan attribute support to new API Acked-by: Punit Agrawal Reviewed-by: Jonathan Cameron Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 57d92f1d779b..74e89d45ff67 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -27,6 +27,7 @@ enum hwmon_sensor_types { hwmon_power, hwmon_energy, hwmon_humidity, + hwmon_fan, }; enum hwmon_chip_attributes { @@ -245,6 +246,32 @@ enum hwmon_humidity_attributes { #define HWMON_H_ALARM BIT(hwmon_humidity_alarm) #define HWMON_H_FAULT BIT(hwmon_humidity_fault) +enum hwmon_fan_attributes { + hwmon_fan_input, + hwmon_fan_label, + hwmon_fan_min, + hwmon_fan_max, + hwmon_fan_div, + hwmon_fan_pulses, + hwmon_fan_target, + hwmon_fan_alarm, + hwmon_fan_min_alarm, + hwmon_fan_max_alarm, + hwmon_fan_fault, +}; + +#define HWMON_F_INPUT BIT(hwmon_fan_input) +#define HWMON_F_LABEL BIT(hwmon_fan_label) +#define HWMON_F_MIN BIT(hwmon_fan_min) +#define HWMON_F_MAX BIT(hwmon_fan_max) +#define HWMON_F_DIV BIT(hwmon_fan_div) +#define HWMON_F_PULSES BIT(hwmon_fan_pulses) +#define HWMON_F_TARGET BIT(hwmon_fan_target) +#define HWMON_F_ALARM BIT(hwmon_fan_alarm) +#define HWMON_F_MIN_ALARM BIT(hwmon_fan_min_alarm) +#define HWMON_F_MAX_ALARM BIT(hwmon_fan_max_alarm) +#define HWMON_F_FAULT BIT(hwmon_fan_fault) + /** * struct hwmon_ops - hwmon device operations * @is_visible: Callback to return attribute visibility. Mandatory. -- cgit v1.2.3 From f9f7bb3a0efafb662a4c639bc62df1df2b7321f9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 26 Jun 2016 12:20:46 -0700 Subject: hwmon: (core) Add basic pwm attribute support to new API Add basic pwm attribute support (no auto attributes) to new API. Reviewed-by: Jonathan Cameron Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 74e89d45ff67..9d2f8bde7d12 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -28,6 +28,7 @@ enum hwmon_sensor_types { hwmon_energy, hwmon_humidity, hwmon_fan, + hwmon_pwm, }; enum hwmon_chip_attributes { @@ -272,6 +273,18 @@ enum hwmon_fan_attributes { #define HWMON_F_MAX_ALARM BIT(hwmon_fan_max_alarm) #define HWMON_F_FAULT BIT(hwmon_fan_fault) +enum hwmon_pwm_attributes { + hwmon_pwm_input, + hwmon_pwm_enable, + hwmon_pwm_mode, + hwmon_pwm_freq, +}; + +#define HWMON_PWM_INPUT BIT(hwmon_pwm_input) +#define HWMON_PWM_ENABLE BIT(hwmon_pwm_enable) +#define HWMON_PWM_MODE BIT(hwmon_pwm_mode) +#define HWMON_PWM_FREQ BIT(hwmon_pwm_freq) + /** * struct hwmon_ops - hwmon device operations * @is_visible: Callback to return attribute visibility. Mandatory. -- cgit v1.2.3 From 2a48d7322dc88f1bc6c8bd9e087fc6341ba659fd Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:27:55 -0700 Subject: rpmsg: rpmsg_send() operations takes rpmsg_endpoint The rpmsg_send() operations has been taking a rpmsg_device, but this forces users of secondary rpmsg_endpoints to use the rpmsg_sendto() interface - by extracting source and destination from the given data structures. If we instead pass the rpmsg_endpoint to these functions a service can use rpmsg_sendto() to respond to messages, even on secondary endpoints. In addition this would allow us to support operations on multiple channels in future backends that does not support off-channel operations. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 70 +++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 2b97c711a5e3..a901a331a190 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -193,13 +193,14 @@ rpmsg_send_offchannel_raw(struct rpmsg_channel *, u32, u32, void *, int, bool); /** * rpmsg_send() - send a message across to the remote processor - * @rpdev: the rpmsg channel + * @ept: the rpmsg endpoint * @data: payload of message * @len: length of payload * - * This function sends @data of length @len on the @rpdev channel. - * The message will be sent to the remote processor which the @rpdev - * channel belongs to, using @rpdev's source and destination addresses. + * This function sends @data of length @len on the @ept endpoint. + * The message will be sent to the remote processor which the @ept + * endpoint belongs to, using @ept's address and its associated rpmsg + * device destination addresses. * In case there are no TX buffers available, the function will block until * one becomes available, or a timeout of 15 seconds elapses. When the latter * happens, -ERESTARTSYS is returned. @@ -208,23 +209,24 @@ rpmsg_send_offchannel_raw(struct rpmsg_channel *, u32, u32, void *, int, bool); * * Returns 0 on success and an appropriate error value on failure. */ -static inline int rpmsg_send(struct rpmsg_channel *rpdev, void *data, int len) +static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) { - u32 src = rpdev->src, dst = rpdev->dst; + struct rpmsg_channel *rpdev = ept->rpdev; + u32 src = ept->addr, dst = rpdev->dst; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); } /** * rpmsg_sendto() - send a message across to the remote processor, specify dst - * @rpdev: the rpmsg channel + * @ept: the rpmsg endpoint * @data: payload of message * @len: length of payload * @dst: destination address * * This function sends @data of length @len to the remote @dst address. - * The message will be sent to the remote processor which the @rpdev - * channel belongs to, using @rpdev's source address. + * The message will be sent to the remote processor which the @ept + * endpoint belongs to, using @ept's address as source. * In case there are no TX buffers available, the function will block until * one becomes available, or a timeout of 15 seconds elapses. When the latter * happens, -ERESTARTSYS is returned. @@ -234,16 +236,17 @@ static inline int rpmsg_send(struct rpmsg_channel *rpdev, void *data, int len) * Returns 0 on success and an appropriate error value on failure. */ static inline -int rpmsg_sendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst) +int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) { - u32 src = rpdev->src; + struct rpmsg_channel *rpdev = ept->rpdev; + u32 src = ept->addr; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); } /** * rpmsg_send_offchannel() - send a message using explicit src/dst addresses - * @rpdev: the rpmsg channel + * @ept: the rpmsg endpoint * @src: source address * @dst: destination address * @data: payload of message @@ -251,8 +254,8 @@ int rpmsg_sendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst) * * This function sends @data of length @len to the remote @dst address, * and uses @src as the source address. - * The message will be sent to the remote processor which the @rpdev - * channel belongs to. + * The message will be sent to the remote processor which the @ept + * endpoint belongs to. * In case there are no TX buffers available, the function will block until * one becomes available, or a timeout of 15 seconds elapses. When the latter * happens, -ERESTARTSYS is returned. @@ -262,21 +265,24 @@ int rpmsg_sendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst) * Returns 0 on success and an appropriate error value on failure. */ static inline -int rpmsg_send_offchannel(struct rpmsg_channel *rpdev, u32 src, u32 dst, +int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, void *data, int len) { + struct rpmsg_channel *rpdev = ept->rpdev; + return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); } /** * rpmsg_send() - send a message across to the remote processor - * @rpdev: the rpmsg channel + * @ept: the rpmsg endpoint * @data: payload of message * @len: length of payload * - * This function sends @data of length @len on the @rpdev channel. - * The message will be sent to the remote processor which the @rpdev - * channel belongs to, using @rpdev's source and destination addresses. + * This function sends @data of length @len on the @ept endpoint. + * The message will be sent to the remote processor which the @ept + * endpoint belongs to, using @ept's address as source and its associated + * rpdev's address as destination. * In case there are no TX buffers available, the function will immediately * return -ENOMEM without waiting until one becomes available. * @@ -285,23 +291,24 @@ int rpmsg_send_offchannel(struct rpmsg_channel *rpdev, u32 src, u32 dst, * Returns 0 on success and an appropriate error value on failure. */ static inline -int rpmsg_trysend(struct rpmsg_channel *rpdev, void *data, int len) +int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) { - u32 src = rpdev->src, dst = rpdev->dst; + struct rpmsg_channel *rpdev = ept->rpdev; + u32 src = ept->addr, dst = rpdev->dst; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); } /** * rpmsg_sendto() - send a message across to the remote processor, specify dst - * @rpdev: the rpmsg channel + * @ept: the rpmsg endpoint * @data: payload of message * @len: length of payload * @dst: destination address * * This function sends @data of length @len to the remote @dst address. - * The message will be sent to the remote processor which the @rpdev - * channel belongs to, using @rpdev's source address. + * The message will be sent to the remote processor which the @ept + * endpoint belongs to, using @ept's address as source. * In case there are no TX buffers available, the function will immediately * return -ENOMEM without waiting until one becomes available. * @@ -310,16 +317,17 @@ int rpmsg_trysend(struct rpmsg_channel *rpdev, void *data, int len) * Returns 0 on success and an appropriate error value on failure. */ static inline -int rpmsg_trysendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst) +int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) { - u32 src = rpdev->src; + struct rpmsg_channel *rpdev = ept->rpdev; + u32 src = ept->addr; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); } /** * rpmsg_send_offchannel() - send a message using explicit src/dst addresses - * @rpdev: the rpmsg channel + * @ept: the rpmsg endpoint * @src: source address * @dst: destination address * @data: payload of message @@ -327,8 +335,8 @@ int rpmsg_trysendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst) * * This function sends @data of length @len to the remote @dst address, * and uses @src as the source address. - * The message will be sent to the remote processor which the @rpdev - * channel belongs to. + * The message will be sent to the remote processor which the @ept + * endpoint belongs to. * In case there are no TX buffers available, the function will immediately * return -ENOMEM without waiting until one becomes available. * @@ -337,9 +345,11 @@ int rpmsg_trysendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst) * Returns 0 on success and an appropriate error value on failure. */ static inline -int rpmsg_trysend_offchannel(struct rpmsg_channel *rpdev, u32 src, u32 dst, +int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, void *data, int len) { + struct rpmsg_channel *rpdev = ept->rpdev; + return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); } -- cgit v1.2.3 From 2b263d2408663a36c14a0aa1f765b2c84b92ea18 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:27:56 -0700 Subject: rpmsg: Make rpmsg_create_ept() take channel_info struct As we introduce support for additional rpmsg backends, some of these only supports point-to-point "links" represented by a name. By making rpmsg_create_ept() take a channel_info struct we allow for these backends to either be passed a source address, a destination address or a name identifier. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index a901a331a190..f278407fcf48 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -97,6 +97,18 @@ enum rpmsg_ns_flags { struct virtproc_info; +/** + * struct rpmsg_channel_info - channel info representation + * @name: name of service + * @src: local address + * @dst: destination address + */ +struct rpmsg_channel_info { + char name[RPMSG_NAME_SIZE]; + u32 src; + u32 dst; +}; + /** * rpmsg_channel - devices that belong to the rpmsg bus are called channels * @vrp: the remote processor this channel belongs to @@ -171,7 +183,8 @@ int __register_rpmsg_driver(struct rpmsg_driver *drv, struct module *owner); void unregister_rpmsg_driver(struct rpmsg_driver *drv); void rpmsg_destroy_ept(struct rpmsg_endpoint *); struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_channel *, - rpmsg_rx_cb_t cb, void *priv, u32 addr); + rpmsg_rx_cb_t cb, void *priv, + struct rpmsg_channel_info chinfo); int rpmsg_send_offchannel_raw(struct rpmsg_channel *, u32, u32, void *, int, bool); -- cgit v1.2.3 From 92e1de51bf2cb8d49adc8925abe56ce84911a232 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:27:57 -0700 Subject: rpmsg: Clean up rpmsg device vs channel naming The rpmsg device representing struct is called rpmsg_channel and the variable name used throughout is rpdev, with the communication happening on endpoints it's clearer to just call this a "device" in a public API. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index f278407fcf48..35a0f39fd09b 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -110,7 +110,7 @@ struct rpmsg_channel_info { }; /** - * rpmsg_channel - devices that belong to the rpmsg bus are called channels + * rpmsg_device - device that belong to the rpmsg bus * @vrp: the remote processor this channel belongs to * @dev: the device struct * @id: device id (used to match between rpmsg drivers and devices) @@ -119,7 +119,7 @@ struct rpmsg_channel_info { * @ept: the rpmsg endpoint of this channel * @announce: if set, rpmsg will announce the creation/removal of this channel */ -struct rpmsg_channel { +struct rpmsg_device { struct virtproc_info *vrp; struct device dev; struct rpmsg_device_id id; @@ -129,7 +129,7 @@ struct rpmsg_channel { bool announce; }; -typedef void (*rpmsg_rx_cb_t)(struct rpmsg_channel *, void *, int, void *, u32); +typedef void (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32); /** * struct rpmsg_endpoint - binds a local rpmsg address to its user @@ -155,7 +155,7 @@ typedef void (*rpmsg_rx_cb_t)(struct rpmsg_channel *, void *, int, void *, u32); * create additional endpoints by themselves (see rpmsg_create_ept()). */ struct rpmsg_endpoint { - struct rpmsg_channel *rpdev; + struct rpmsg_device *rpdev; struct kref refcount; rpmsg_rx_cb_t cb; struct mutex cb_lock; @@ -174,19 +174,21 @@ struct rpmsg_endpoint { struct rpmsg_driver { struct device_driver drv; const struct rpmsg_device_id *id_table; - int (*probe)(struct rpmsg_channel *dev); - void (*remove)(struct rpmsg_channel *dev); - void (*callback)(struct rpmsg_channel *, void *, int, void *, u32); + int (*probe)(struct rpmsg_device *dev); + void (*remove)(struct rpmsg_device *dev); + void (*callback)(struct rpmsg_device *, void *, int, void *, u32); }; +int register_rpmsg_device(struct rpmsg_device *dev); +void unregister_rpmsg_device(struct rpmsg_device *dev); int __register_rpmsg_driver(struct rpmsg_driver *drv, struct module *owner); void unregister_rpmsg_driver(struct rpmsg_driver *drv); void rpmsg_destroy_ept(struct rpmsg_endpoint *); -struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_channel *, +struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *, rpmsg_rx_cb_t cb, void *priv, struct rpmsg_channel_info chinfo); int -rpmsg_send_offchannel_raw(struct rpmsg_channel *, u32, u32, void *, int, bool); +rpmsg_send_offchannel_raw(struct rpmsg_device *, u32, u32, void *, int, bool); /* use a macro to avoid include chaining to get THIS_MODULE */ #define register_rpmsg_driver(drv) \ @@ -224,7 +226,7 @@ rpmsg_send_offchannel_raw(struct rpmsg_channel *, u32, u32, void *, int, bool); */ static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) { - struct rpmsg_channel *rpdev = ept->rpdev; + struct rpmsg_device *rpdev = ept->rpdev; u32 src = ept->addr, dst = rpdev->dst; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); @@ -251,7 +253,7 @@ static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) { - struct rpmsg_channel *rpdev = ept->rpdev; + struct rpmsg_device *rpdev = ept->rpdev; u32 src = ept->addr; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); @@ -281,7 +283,7 @@ static inline int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, void *data, int len) { - struct rpmsg_channel *rpdev = ept->rpdev; + struct rpmsg_device *rpdev = ept->rpdev; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); } @@ -306,7 +308,7 @@ int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) { - struct rpmsg_channel *rpdev = ept->rpdev; + struct rpmsg_device *rpdev = ept->rpdev; u32 src = ept->addr, dst = rpdev->dst; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); @@ -332,7 +334,7 @@ int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) { - struct rpmsg_channel *rpdev = ept->rpdev; + struct rpmsg_device *rpdev = ept->rpdev; u32 src = ept->addr; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); @@ -361,7 +363,7 @@ static inline int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, void *data, int len) { - struct rpmsg_channel *rpdev = ept->rpdev; + struct rpmsg_device *rpdev = ept->rpdev; return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); } -- cgit v1.2.3 From 36b72c7dca718717108120cdff7b56258a8862b4 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:27:58 -0700 Subject: rpmsg: Introduce indirection table for rpmsg_device operations To allow for multiple backend implementations add an indireection table for rpmsg_device related operations and move the virtio implementation behind this table. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 35a0f39fd09b..9fdcfc7c7837 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -96,6 +96,8 @@ enum rpmsg_ns_flags { #define RPMSG_ADDR_ANY 0xFFFFFFFF struct virtproc_info; +struct rpmsg_endpoint; +struct rpmsg_device_ops; /** * struct rpmsg_channel_info - channel info representation @@ -127,10 +129,31 @@ struct rpmsg_device { u32 dst; struct rpmsg_endpoint *ept; bool announce; + + const struct rpmsg_device_ops *ops; }; typedef void (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32); +/** + * struct rpmsg_device_ops - indirection table for the rpmsg_device operations + * @create_ept: create backend-specific endpoint, requried + * @announce_create: announce presence of new channel, optional + * @announce_destroy: announce destruction of channel, optional + * + * Indirection table for the operations that a rpmsg backend should implement. + * @announce_create and @announce_destroy are optional as the backend might + * advertise new channels implicitly by creating the endpoints. + */ +struct rpmsg_device_ops { + struct rpmsg_endpoint *(*create_ept)(struct rpmsg_device *rpdev, + rpmsg_rx_cb_t cb, void *priv, + struct rpmsg_channel_info chinfo); + + int (*announce_create)(struct rpmsg_device *ept); + int (*announce_destroy)(struct rpmsg_device *ept); +}; + /** * struct rpmsg_endpoint - binds a local rpmsg address to its user * @rpdev: rpmsg channel device -- cgit v1.2.3 From 8a228ecfe086b84e237a8d78be079e286e1ea67b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:28:00 -0700 Subject: rpmsg: Indirection table for rpmsg_endpoint operations Add indirection table for rpmsg_endpoint related operations and move virtio implementation behind this, this finishes of the decoupling of the virtio implementation from the public API. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 62 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 9fdcfc7c7837..d54458effd54 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -96,8 +96,10 @@ enum rpmsg_ns_flags { #define RPMSG_ADDR_ANY 0xFFFFFFFF struct virtproc_info; +struct rpmsg_device; struct rpmsg_endpoint; struct rpmsg_device_ops; +struct rpmsg_endpoint_ops; /** * struct rpmsg_channel_info - channel info representation @@ -184,6 +186,36 @@ struct rpmsg_endpoint { struct mutex cb_lock; u32 addr; void *priv; + + const struct rpmsg_endpoint_ops *ops; +}; + +/** + * struct rpmsg_endpoint_ops - indirection table for rpmsg_endpoint operations + * @destroy_ept: destroy the given endpoint, required + * @send: see @rpmsg_send(), required + * @sendto: see @rpmsg_sendto(), optional + * @send_offchannel: see @rpmsg_send_offchannel(), optional + * @trysend: see @rpmsg_trysend(), required + * @trysendto: see @rpmsg_trysendto(), optional + * @trysend_offchannel: see @rpmsg_trysend_offchannel(), optional + * + * Indirection table for the operations that a rpmsg backend should implement. + * In addition to @destroy_ept, the backend must at least implement @send and + * @trysend, while the variants sending data off-channel are optional. + */ +struct rpmsg_endpoint_ops { + void (*destroy_ept)(struct rpmsg_endpoint *ept); + + int (*send)(struct rpmsg_endpoint *ept, void *data, int len); + int (*sendto)(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); + int (*send_offchannel)(struct rpmsg_endpoint *ept, u32 src, u32 dst, + void *data, int len); + + int (*trysend)(struct rpmsg_endpoint *ept, void *data, int len); + int (*trysendto)(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); + int (*trysend_offchannel)(struct rpmsg_endpoint *ept, u32 src, u32 dst, + void *data, int len); }; /** @@ -210,8 +242,6 @@ void rpmsg_destroy_ept(struct rpmsg_endpoint *); struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *, rpmsg_rx_cb_t cb, void *priv, struct rpmsg_channel_info chinfo); -int -rpmsg_send_offchannel_raw(struct rpmsg_device *, u32, u32, void *, int, bool); /* use a macro to avoid include chaining to get THIS_MODULE */ #define register_rpmsg_driver(drv) \ @@ -249,10 +279,7 @@ rpmsg_send_offchannel_raw(struct rpmsg_device *, u32, u32, void *, int, bool); */ static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) { - struct rpmsg_device *rpdev = ept->rpdev; - u32 src = ept->addr, dst = rpdev->dst; - - return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); + return ept->ops->send(ept, data, len); } /** @@ -276,10 +303,7 @@ static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) { - struct rpmsg_device *rpdev = ept->rpdev; - u32 src = ept->addr; - - return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); + return ept->ops->sendto(ept, data, len, dst); } /** @@ -306,9 +330,7 @@ static inline int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, void *data, int len) { - struct rpmsg_device *rpdev = ept->rpdev; - - return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, true); + return ept->ops->send_offchannel(ept, src, dst, data, len); } /** @@ -331,10 +353,7 @@ int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) { - struct rpmsg_device *rpdev = ept->rpdev; - u32 src = ept->addr, dst = rpdev->dst; - - return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); + return ept->ops->trysend(ept, data, len); } /** @@ -357,10 +376,7 @@ int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) { - struct rpmsg_device *rpdev = ept->rpdev; - u32 src = ept->addr; - - return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); + return ept->ops->trysendto(ept, data, len, dst); } /** @@ -386,9 +402,7 @@ static inline int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, void *data, int len) { - struct rpmsg_device *rpdev = ept->rpdev; - - return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false); + return ept->ops->trysend_offchannel(ept, src, dst, data, len); } #endif /* _LINUX_RPMSG_H */ -- cgit v1.2.3 From c9bd6f422090b874b5877b4cedcd7757eac33117 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:28:01 -0700 Subject: rpmsg: Move endpoint related interface to rpmsg core Move the rpmsg_send() and rpmsg_destroy_ept() interface to the rpmsg core, so that we eventually can hide the rpmsg_endpoint ops from the public API. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 148 ++------------------------------------------------ 1 file changed, 6 insertions(+), 142 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index d54458effd54..99efd598590e 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -259,150 +259,14 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *, module_driver(__rpmsg_driver, register_rpmsg_driver, \ unregister_rpmsg_driver) -/** - * rpmsg_send() - send a message across to the remote processor - * @ept: the rpmsg endpoint - * @data: payload of message - * @len: length of payload - * - * This function sends @data of length @len on the @ept endpoint. - * The message will be sent to the remote processor which the @ept - * endpoint belongs to, using @ept's address and its associated rpmsg - * device destination addresses. - * In case there are no TX buffers available, the function will block until - * one becomes available, or a timeout of 15 seconds elapses. When the latter - * happens, -ERESTARTSYS is returned. - * - * Can only be called from process context (for now). - * - * Returns 0 on success and an appropriate error value on failure. - */ -static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) -{ - return ept->ops->send(ept, data, len); -} - -/** - * rpmsg_sendto() - send a message across to the remote processor, specify dst - * @ept: the rpmsg endpoint - * @data: payload of message - * @len: length of payload - * @dst: destination address - * - * This function sends @data of length @len to the remote @dst address. - * The message will be sent to the remote processor which the @ept - * endpoint belongs to, using @ept's address as source. - * In case there are no TX buffers available, the function will block until - * one becomes available, or a timeout of 15 seconds elapses. When the latter - * happens, -ERESTARTSYS is returned. - * - * Can only be called from process context (for now). - * - * Returns 0 on success and an appropriate error value on failure. - */ -static inline -int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) -{ - return ept->ops->sendto(ept, data, len, dst); -} - -/** - * rpmsg_send_offchannel() - send a message using explicit src/dst addresses - * @ept: the rpmsg endpoint - * @src: source address - * @dst: destination address - * @data: payload of message - * @len: length of payload - * - * This function sends @data of length @len to the remote @dst address, - * and uses @src as the source address. - * The message will be sent to the remote processor which the @ept - * endpoint belongs to. - * In case there are no TX buffers available, the function will block until - * one becomes available, or a timeout of 15 seconds elapses. When the latter - * happens, -ERESTARTSYS is returned. - * - * Can only be called from process context (for now). - * - * Returns 0 on success and an appropriate error value on failure. - */ -static inline +int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len); +int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len) -{ - return ept->ops->send_offchannel(ept, src, dst, data, len); -} + void *data, int len); -/** - * rpmsg_send() - send a message across to the remote processor - * @ept: the rpmsg endpoint - * @data: payload of message - * @len: length of payload - * - * This function sends @data of length @len on the @ept endpoint. - * The message will be sent to the remote processor which the @ept - * endpoint belongs to, using @ept's address as source and its associated - * rpdev's address as destination. - * In case there are no TX buffers available, the function will immediately - * return -ENOMEM without waiting until one becomes available. - * - * Can only be called from process context (for now). - * - * Returns 0 on success and an appropriate error value on failure. - */ -static inline -int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) -{ - return ept->ops->trysend(ept, data, len); -} - -/** - * rpmsg_sendto() - send a message across to the remote processor, specify dst - * @ept: the rpmsg endpoint - * @data: payload of message - * @len: length of payload - * @dst: destination address - * - * This function sends @data of length @len to the remote @dst address. - * The message will be sent to the remote processor which the @ept - * endpoint belongs to, using @ept's address as source. - * In case there are no TX buffers available, the function will immediately - * return -ENOMEM without waiting until one becomes available. - * - * Can only be called from process context (for now). - * - * Returns 0 on success and an appropriate error value on failure. - */ -static inline -int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst) -{ - return ept->ops->trysendto(ept, data, len, dst); -} - -/** - * rpmsg_send_offchannel() - send a message using explicit src/dst addresses - * @ept: the rpmsg endpoint - * @src: source address - * @dst: destination address - * @data: payload of message - * @len: length of payload - * - * This function sends @data of length @len to the remote @dst address, - * and uses @src as the source address. - * The message will be sent to the remote processor which the @ept - * endpoint belongs to. - * In case there are no TX buffers available, the function will immediately - * return -ENOMEM without waiting until one becomes available. - * - * Can only be called from process context (for now). - * - * Returns 0 on success and an appropriate error value on failure. - */ -static inline +int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len); +int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len) -{ - return ept->ops->trysend_offchannel(ept, src, dst, data, len); -} + void *data, int len); #endif /* _LINUX_RPMSG_H */ -- cgit v1.2.3 From fade037e0fd504cd02f51d280928d89c75527f2e Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:28:05 -0700 Subject: rpmsg: Hide rpmsg indirection tables Move the device and endpoint indirection tables to the rpmsg internal header file, to hide them from the public API. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 99efd598590e..4f9445f71f2f 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -137,25 +137,6 @@ struct rpmsg_device { typedef void (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32); -/** - * struct rpmsg_device_ops - indirection table for the rpmsg_device operations - * @create_ept: create backend-specific endpoint, requried - * @announce_create: announce presence of new channel, optional - * @announce_destroy: announce destruction of channel, optional - * - * Indirection table for the operations that a rpmsg backend should implement. - * @announce_create and @announce_destroy are optional as the backend might - * advertise new channels implicitly by creating the endpoints. - */ -struct rpmsg_device_ops { - struct rpmsg_endpoint *(*create_ept)(struct rpmsg_device *rpdev, - rpmsg_rx_cb_t cb, void *priv, - struct rpmsg_channel_info chinfo); - - int (*announce_create)(struct rpmsg_device *ept); - int (*announce_destroy)(struct rpmsg_device *ept); -}; - /** * struct rpmsg_endpoint - binds a local rpmsg address to its user * @rpdev: rpmsg channel device @@ -190,34 +171,6 @@ struct rpmsg_endpoint { const struct rpmsg_endpoint_ops *ops; }; -/** - * struct rpmsg_endpoint_ops - indirection table for rpmsg_endpoint operations - * @destroy_ept: destroy the given endpoint, required - * @send: see @rpmsg_send(), required - * @sendto: see @rpmsg_sendto(), optional - * @send_offchannel: see @rpmsg_send_offchannel(), optional - * @trysend: see @rpmsg_trysend(), required - * @trysendto: see @rpmsg_trysendto(), optional - * @trysend_offchannel: see @rpmsg_trysend_offchannel(), optional - * - * Indirection table for the operations that a rpmsg backend should implement. - * In addition to @destroy_ept, the backend must at least implement @send and - * @trysend, while the variants sending data off-channel are optional. - */ -struct rpmsg_endpoint_ops { - void (*destroy_ept)(struct rpmsg_endpoint *ept); - - int (*send)(struct rpmsg_endpoint *ept, void *data, int len); - int (*sendto)(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); - int (*send_offchannel)(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len); - - int (*trysend)(struct rpmsg_endpoint *ept, void *data, int len); - int (*trysendto)(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); - int (*trysend_offchannel)(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len); -}; - /** * struct rpmsg_driver - rpmsg driver struct * @drv: underlying device driver -- cgit v1.2.3 From 3bf950ff23337fc812736520ff9d098284187844 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:28:06 -0700 Subject: rpmsg: virtio: Hide vrp pointer from the public API Create a container struct virtio_rpmsg_channel around the rpmsg_channel to keep virtio backend information separate from the rpmsg and public API. This makes the public structures independant of virtio. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 4f9445f71f2f..b4b56b010f71 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -95,7 +95,6 @@ enum rpmsg_ns_flags { #define RPMSG_ADDR_ANY 0xFFFFFFFF -struct virtproc_info; struct rpmsg_device; struct rpmsg_endpoint; struct rpmsg_device_ops; @@ -115,7 +114,6 @@ struct rpmsg_channel_info { /** * rpmsg_device - device that belong to the rpmsg bus - * @vrp: the remote processor this channel belongs to * @dev: the device struct * @id: device id (used to match between rpmsg drivers and devices) * @src: local address @@ -124,7 +122,6 @@ struct rpmsg_channel_info { * @announce: if set, rpmsg will announce the creation/removal of this channel */ struct rpmsg_device { - struct virtproc_info *vrp; struct device dev; struct rpmsg_device_id id; u32 src; -- cgit v1.2.3 From e88dae5da46d3989fd6a83dd9f6806777b20d1ae Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:28:07 -0700 Subject: rpmsg: Move virtio specifics from public header Move virtio rpmsg implementation details from the public header file to the virtio rpmsg implementation. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 52 --------------------------------------------------- 1 file changed, 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index b4b56b010f71..71b16d37503a 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -41,58 +41,6 @@ #include #include -/* The feature bitmap for virtio rpmsg */ -#define VIRTIO_RPMSG_F_NS 0 /* RP supports name service notifications */ - -/** - * struct rpmsg_hdr - common header for all rpmsg messages - * @src: source address - * @dst: destination address - * @reserved: reserved for future use - * @len: length of payload (in bytes) - * @flags: message flags - * @data: @len bytes of message payload data - * - * Every message sent(/received) on the rpmsg bus begins with this header. - */ -struct rpmsg_hdr { - u32 src; - u32 dst; - u32 reserved; - u16 len; - u16 flags; - u8 data[0]; -} __packed; - -/** - * struct rpmsg_ns_msg - dynamic name service announcement message - * @name: name of remote service that is published - * @addr: address of remote service that is published - * @flags: indicates whether service is created or destroyed - * - * This message is sent across to publish a new service, or announce - * about its removal. When we receive these messages, an appropriate - * rpmsg channel (i.e device) is created/destroyed. In turn, the ->probe() - * or ->remove() handler of the appropriate rpmsg driver will be invoked - * (if/as-soon-as one is registered). - */ -struct rpmsg_ns_msg { - char name[RPMSG_NAME_SIZE]; - u32 addr; - u32 flags; -} __packed; - -/** - * enum rpmsg_ns_flags - dynamic name service announcement flags - * - * @RPMSG_NS_CREATE: a new remote service was just created - * @RPMSG_NS_DESTROY: a known remote service was just destroyed - */ -enum rpmsg_ns_flags { - RPMSG_NS_CREATE = 0, - RPMSG_NS_DESTROY = 1, -}; - #define RPMSG_ADDR_ANY 0xFFFFFFFF struct rpmsg_device; -- cgit v1.2.3 From 4b83c52a21cf5a7421b7c28bebf8ff28ba96ceb9 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 1 Sep 2016 15:28:08 -0700 Subject: rpmsg: Allow callback to return errors Some rpmsg backends support holding on to and redelivering messages upon failed handling of them, so provide a way for the callback to report and error and allow the backends to handle this. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 71b16d37503a..452d393cc8dd 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -80,7 +80,7 @@ struct rpmsg_device { const struct rpmsg_device_ops *ops; }; -typedef void (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32); +typedef int (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32); /** * struct rpmsg_endpoint - binds a local rpmsg address to its user @@ -129,7 +129,7 @@ struct rpmsg_driver { const struct rpmsg_device_id *id_table; int (*probe)(struct rpmsg_device *dev); void (*remove)(struct rpmsg_device *dev); - void (*callback)(struct rpmsg_device *, void *, int, void *, u32); + int (*callback)(struct rpmsg_device *, void *, int, void *, u32); }; int register_rpmsg_device(struct rpmsg_device *dev); -- cgit v1.2.3 From b67067f1176df6ee727450546b58704e4b588563 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 24 Aug 2016 22:29:20 +1000 Subject: kbuild: allow archs to select link dead code/data elimination Introduce LD_DEAD_CODE_DATA_ELIMINATION option for architectures to select to build with -ffunction-sections, -fdata-sections, and link with --gc-sections. It requires some work (documented) to ensure all unreferenced entrypoints are live, and requires toolchain and build verification, so it is made a per-arch option for now. On a random powerpc64le build, this yelds a significant size saving, it boots and runs fine, but there is a lot I haven't tested as yet, so these savings may be reduced if there are bugs in the link. text data bss dec filename 11169741 1180744 1923176 14273661 vmlinux 10445269 1004127 1919707 13369103 vmlinux.dce ~700K text, ~170K data, 6% removed from kernel image size. Signed-off-by: Nicholas Piggin Signed-off-by: Michal Marek --- include/linux/compiler.h | 23 +++++++++++++++++++++++ include/linux/export.h | 30 +++++++++++++++--------------- include/linux/init.h | 38 +++++++++++++------------------------- 3 files changed, 51 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 1bb954842725..86130cded110 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -182,6 +182,29 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define unreachable() do { } while (1) #endif +/* + * KENTRY - kernel entry point + * This can be used to annotate symbols (functions or data) that are used + * without their linker symbol being referenced explicitly. For example, + * interrupt vector handlers, or functions in the kernel image that are found + * programatically. + * + * Not required for symbols exported with EXPORT_SYMBOL, or initcalls. Those + * are handled in their own way (with KEEP() in linker scripts). + * + * KENTRY can be avoided if the symbols in question are marked as KEEP() in the + * linker script. For example an architecture could KEEP() its entire + * boot/exception vector code rather than annotate each function and data. + */ +#ifndef KENTRY +# define KENTRY(sym) \ + extern typeof(sym) sym; \ + static const unsigned long __kentry_##sym \ + __used \ + __attribute__((section("___kentry" "+" #sym ), used)) \ + = (unsigned long)&sym; +#endif + #ifndef RELOC_HIDE # define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ diff --git a/include/linux/export.h b/include/linux/export.h index c565f87f005e..337cb90f3668 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -1,5 +1,6 @@ #ifndef _LINUX_EXPORT_H #define _LINUX_EXPORT_H + /* * Export symbols from the kernel to modules. Forked from module.h * to reduce the amount of pointless cruft we feed to gcc when only @@ -42,27 +43,26 @@ extern struct module __this_module; #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to * generate a checksums for some symbols */ -#define __CRC_SYMBOL(sym, sec) \ - extern __visible void *__crc_##sym __attribute__((weak)); \ - static const unsigned long __kcrctab_##sym \ - __used \ - __attribute__((section("___kcrctab" sec "+" #sym), unused)) \ +#define __CRC_SYMBOL(sym, sec) \ + extern __visible void *__crc_##sym __attribute__((weak)); \ + static const unsigned long __kcrctab_##sym \ + __used \ + __attribute__((section("___kcrctab" sec "+" #sym), used)) \ = (unsigned long) &__crc_##sym; #else #define __CRC_SYMBOL(sym, sec) #endif /* For every exported symbol, place a struct in the __ksymtab section */ -#define ___EXPORT_SYMBOL(sym, sec) \ - extern typeof(sym) sym; \ - __CRC_SYMBOL(sym, sec) \ - static const char __kstrtab_##sym[] \ - __attribute__((section("__ksymtab_strings"), aligned(1))) \ - = VMLINUX_SYMBOL_STR(sym); \ - extern const struct kernel_symbol __ksymtab_##sym; \ - __visible const struct kernel_symbol __ksymtab_##sym \ - __used \ - __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ +#define ___EXPORT_SYMBOL(sym, sec) \ + extern typeof(sym) sym; \ + __CRC_SYMBOL(sym, sec) \ + static const char __kstrtab_##sym[] \ + __attribute__((section("__ksymtab_strings"), aligned(1))) \ + = VMLINUX_SYMBOL_STR(sym); \ + static const struct kernel_symbol __ksymtab_##sym \ + __used \ + __attribute__((section("___ksymtab" sec "+" #sym), used)) \ = { (unsigned long)&sym, __kstrtab_##sym } #if defined(__KSYM_DEPS__) diff --git a/include/linux/init.h b/include/linux/init.h index 6935d02474aa..e571fec4bb28 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -150,24 +150,8 @@ extern bool initcall_debug; #ifndef __ASSEMBLY__ -#ifdef CONFIG_LTO -/* Work around a LTO gcc problem: when there is no reference to a variable - * in a module it will be moved to the end of the program. This causes - * reordering of initcalls which the kernel does not like. - * Add a dummy reference function to avoid this. The function is - * deleted by the linker. - */ -#define LTO_REFERENCE_INITCALL(x) \ - ; /* yes this is needed */ \ - static __used __exit void *reference_##x(void) \ - { \ - return &x; \ - } -#else -#define LTO_REFERENCE_INITCALL(x) -#endif - -/* initcalls are now grouped by functionality into separate +/* + * initcalls are now grouped by functionality into separate * subsections. Ordering inside the subsections is determined * by link order. * For backwards compatibility, initcall() puts the call in @@ -175,12 +159,16 @@ extern bool initcall_debug; * * The `id' arg to __define_initcall() is needed so that multiple initcalls * can point at the same handler without causing duplicate-symbol build errors. + * + * Initcalls are run by placing pointers in initcall sections that the + * kernel iterates at runtime. The linker can do dead code / data elimination + * and remove that completely, so the initcall sections have to be marked + * as KEEP() in the linker script. */ #define __define_initcall(fn, id) \ static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(".initcall" #id ".init"))) = fn; \ - LTO_REFERENCE_INITCALL(__initcall_##fn##id) + __attribute__((__section__(".initcall" #id ".init"))) = fn; /* * Early initcalls run before initializing SMP. @@ -216,15 +204,15 @@ extern bool initcall_debug; #define __initcall(fn) device_initcall(fn) -#define __exitcall(fn) \ +#define __exitcall(fn) \ static exitcall_t __exitcall_##fn __exit_call = fn -#define console_initcall(fn) \ - static initcall_t __initcall_##fn \ +#define console_initcall(fn) \ + static initcall_t __initcall_##fn \ __used __section(.con_initcall.init) = fn -#define security_initcall(fn) \ - static initcall_t __initcall_##fn \ +#define security_initcall(fn) \ + static initcall_t __initcall_##fn \ __used __section(.security_initcall.init) = fn struct obs_kernel_param { -- cgit v1.2.3 From 3e9b3112ec74f192eaab976c3889e34255cae940 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Aug 2016 12:46:44 +0100 Subject: add basic register-field manipulation macros Common approach to accessing register fields is to define structures or sets of macros containing mask and shift pair. Operations on the register are then performed as follows: field = (reg >> shift) & mask; reg &= ~(mask << shift); reg |= (field & mask) << shift; Defining shift and mask separately is tedious. Ivo van Doorn came up with an idea of computing them at compilation time based on a single shifted mask (later refined by Felix) which can be used like this: #define REG_FIELD 0x000ff000 field = FIELD_GET(REG_FIELD, reg); reg &= ~REG_FIELD; reg |= FIELD_PREP(REG_FIELD, field); FIELD_{GET,PREP} macros take care of finding out what the appropriate shift is based on compilation time ffs operation. GENMASK can be used to define registers (which is usually less error-prone and easier to match with datasheets). This approach is the most convenient I've seen so to limit code multiplication let's move the macros to a global header file. Attempts to use static inlines instead of macros failed due to false positive triggering of BUILD_BUG_ON()s, especially with GCC < 6.0. Signed-off-by: Jakub Kicinski Reviewed-by: Dinan Gunawardena Signed-off-by: Kalle Valo --- include/linux/bitfield.h | 93 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/bug.h | 3 ++ 2 files changed, 96 insertions(+) create mode 100644 include/linux/bitfield.h (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h new file mode 100644 index 000000000000..f6505d83069d --- /dev/null +++ b/include/linux/bitfield.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2014 Felix Fietkau + * Copyright (C) 2004 - 2009 Ivo van Doorn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_BITFIELD_H +#define _LINUX_BITFIELD_H + +#include + +/* + * Bitfield access macros + * + * FIELD_{GET,PREP} macros take as first parameter shifted mask + * from which they extract the base mask and shift amount. + * Mask must be a compilation time constant. + * + * Example: + * + * #define REG_FIELD_A GENMASK(6, 0) + * #define REG_FIELD_B BIT(7) + * #define REG_FIELD_C GENMASK(15, 8) + * #define REG_FIELD_D GENMASK(31, 16) + * + * Get: + * a = FIELD_GET(REG_FIELD_A, reg); + * b = FIELD_GET(REG_FIELD_B, reg); + * + * Set: + * reg = FIELD_PREP(REG_FIELD_A, 1) | + * FIELD_PREP(REG_FIELD_B, 0) | + * FIELD_PREP(REG_FIELD_C, c) | + * FIELD_PREP(REG_FIELD_D, 0x40); + * + * Modify: + * reg &= ~REG_FIELD_C; + * reg |= FIELD_PREP(REG_FIELD_C, c); + */ + +#define __bf_shf(x) (__builtin_ffsll(x) - 1) + +#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ + _pfx "mask is not constant"); \ + BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ + BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ + }) + +/** + * FIELD_PREP() - prepare a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP() masks and shifts up the value. The result should + * be combined with other fields of the bitfield using logical OR. + */ +#define FIELD_PREP(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ + ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ + }) + +/** + * FIELD_GET() - extract a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg: 32bit value of entire bitfield + * + * FIELD_GET() extracts the field specified by @_mask from the + * bitfield passed in as @_reg by masking and shifting it down. + */ +#define FIELD_GET(_mask, _reg) \ + ({ \ + __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +#endif diff --git a/include/linux/bug.h b/include/linux/bug.h index e51b0709e78d..292d6a10b0c2 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -13,6 +13,7 @@ enum bug_trap_type { struct pt_regs; #ifdef __CHECKER__ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_ZERO(e) (0) #define BUILD_BUG_ON_NULL(e) ((void*)0) @@ -24,6 +25,8 @@ struct pt_regs; #else /* __CHECKER__ */ /* Force a compilation error if a constant expression is not a power of 2 */ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON(((n) & ((n) - 1)) != 0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) -- cgit v1.2.3 From 634faf3686900ccdee87b77e2c56df8b2159912b Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 5 Sep 2016 11:42:12 +0100 Subject: brcmfmac: add support for bcm4339 chip with modalias sdio:c00v02D0d4339 The driver already supports the bcm4339 chipset but only for the variant that shares the same modalias as the bcm4335, ie. sdio:c00v02D0d4335. It turns out that there are also bcm4339 devices out there that have a more distiguishable modalias sdio:c00v02D0d4339. Reported-by: Steve deRosier Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 0d126aeb3ec0..d43ef96bf075 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -32,6 +32,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43340 0xa94c #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 +#define SDIO_DEVICE_ID_BROADCOM_4339 0x4339 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 -- cgit v1.2.3 From 7d06d9c9bd813fc956b9c7bffc1b9724009983eb Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 29 Jul 2016 09:30:12 -0700 Subject: mm: Implement new pkey_mprotect() system call pkey_mprotect() is just like mprotect, except it also takes a protection key as an argument. On systems that do not support protection keys, it still works, but requires that key=0. Otherwise it does exactly what mprotect does. I expect it to get used like this, if you want to guarantee that any mapping you create can *never* be accessed without the right protection keys set up. int real_prot = PROT_READ|PROT_WRITE; pkey = pkey_alloc(0, PKEY_DENY_ACCESS); ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey); This way, there is *no* window where the mapping is accessible since it was always either PROT_NONE or had a protection key set that denied all access. We settled on 'unsigned long' for the type of the key here. We only need 4 bits on x86 today, but I figured that other architectures might need some more space. Semantically, we have a bit of a problem if we combine this syscall with our previously-introduced execute-only support: What do we do when we mix execute-only pkey use with pkey_mprotect() use? For instance: pkey_mprotect(ptr, PAGE_SIZE, PROT_WRITE, 6); // set pkey=6 mprotect(ptr, PAGE_SIZE, PROT_EXEC); // set pkey=X_ONLY_PKEY? mprotect(ptr, PAGE_SIZE, PROT_WRITE); // is pkey=6 again? To solve that, we make the plain-mprotect()-initiated execute-only support only apply to VMAs that have the default protection key (0) set on them. Proposed semantics: 1. protection key 0 is special and represents the default, "unassigned" protection key. It is always allocated. 2. mprotect() never affects a mapping's pkey_mprotect()-assigned protection key. A protection key of 0 (even if set explicitly) represents an unassigned protection key. 2a. mprotect(PROT_EXEC) on a mapping with an assigned protection key may or may not result in a mapping with execute-only properties. pkey_mprotect() plus pkey_set() on all threads should be used to _guarantee_ execute-only semantics if this is not a strong enough semantic. 3. mprotect(PROT_EXEC) may result in an "execute-only" mapping. The kernel will internally attempt to allocate and dedicate a protection key for the purpose of execute-only mappings. This may not be possible in cases where there are no free protection keys available. It can also happen, of course, in situations where there is no hardware support for protection keys. Signed-off-by: Dave Hansen Acked-by: Mel Gorman Cc: linux-arch@vger.kernel.org Cc: Dave Hansen Cc: arnd@arndb.de Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163012.3DDD36C4@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- include/linux/pkeys.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 1d405a2b7272..0030b4024559 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -18,16 +18,4 @@ #define PKEY_DEDICATED_EXECUTE_ONLY 0 #endif /* ! CONFIG_ARCH_HAS_PKEYS */ -/* - * This is called from mprotect_pkey(). - * - * Returns true if the protection keys is valid. - */ -static inline bool validate_pkey(int pkey) -{ - if (pkey < 0) - return false; - return (pkey < arch_max_pkey()); -} - #endif /* _LINUX_PKEYS_H */ -- cgit v1.2.3 From a8502b67d739c1d7a4542c1da0a5d98a6a58c177 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 29 Jul 2016 09:30:13 -0700 Subject: x86/pkeys: Make mprotect_key() mask off additional vm_flags Today, mprotect() takes 4 bits of data: PROT_READ/WRITE/EXEC/NONE. Three of those bits: READ/WRITE/EXEC get translated directly in to vma->vm_flags by calc_vm_prot_bits(). If a bit is unset in mprotect()'s 'prot' argument then it must be cleared in vma->vm_flags during the mprotect() call. We do this clearing today by first calculating the VMA flags we want set, then clearing the ones we do not want to inherit from the original VMA: vm_flags = calc_vm_prot_bits(prot, key); ... newflags = vm_flags; newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); However, we *also* want to mask off the original VMA's vm_flags in which we store the protection key. To do that, this patch adds a new macro: ARCH_VM_PKEY_FLAGS which allows the architecture to specify additional bits that it would like cleared. We use that to ensure that the VM_PKEY_BIT* bits get cleared. Signed-off-by: Dave Hansen Acked-by: Mel Gorman Reviewed-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Dave Hansen Cc: arnd@arndb.de Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163013.E48D6981@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- include/linux/pkeys.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 0030b4024559..6899b0bc7ce0 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -16,6 +16,7 @@ #define execute_only_pkey(mm) (0) #define arch_override_mprotect_pkey(vma, prot, pkey) (0) #define PKEY_DEDICATED_EXECUTE_ONLY 0 +#define ARCH_VM_PKEY_FLAGS 0 #endif /* ! CONFIG_ARCH_HAS_PKEYS */ #endif /* _LINUX_PKEYS_H */ -- cgit v1.2.3 From e8c24d3a23a469f1f40d4de24d872ca7023ced0a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 29 Jul 2016 09:30:15 -0700 Subject: x86/pkeys: Allocation/free syscalls This patch adds two new system calls: int pkey_alloc(unsigned long flags, unsigned long init_access_rights) int pkey_free(int pkey); These implement an "allocator" for the protection keys themselves, which can be thought of as analogous to the allocator that the kernel has for file descriptors. The kernel tracks which numbers are in use, and only allows operations on keys that are valid. A key which was not obtained by pkey_alloc() may not, for instance, be passed to pkey_mprotect(). These system calls are also very important given the kernel's use of pkeys to implement execute-only support. These help ensure that userspace can never assume that it has control of a key unless it first asks the kernel. The kernel does not promise to preserve PKRU (right register) contents except for allocated pkeys. The 'init_access_rights' argument to pkey_alloc() specifies the rights that will be established for the returned pkey. For instance: pkey = pkey_alloc(flags, PKEY_DENY_WRITE); will allocate 'pkey', but also sets the bits in PKRU[1] such that writing to 'pkey' is already denied. The kernel does not prevent pkey_free() from successfully freeing in-use pkeys (those still assigned to a memory range by pkey_mprotect()). It would be expensive to implement the checks for this, so we instead say, "Just don't do it" since sane software will never do it anyway. Any piece of userspace calling pkey_alloc() needs to be prepared for it to fail. Why? pkey_alloc() returns the same error code (ENOSPC) when there are no pkeys and when pkeys are unsupported. They can be unsupported for a whole host of reasons, so apps must be prepared for this. Also, libraries or LD_PRELOADs might steal keys before an application gets access to them. This allocation mechanism could be implemented in userspace. Even if we did it in userspace, we would still need additional user/kernel interfaces to tell userspace which keys are being used by the kernel internally (such as for execute-only mappings). Having the kernel provide this facility completely removes the need for these additional interfaces, or having an implementation of this in userspace at all. Note that we have to make changes to all of the architectures that do not use mman-common.h because we use the new PKEY_DENY_ACCESS/WRITE macros in arch-independent code. 1. PKRU is the Protection Key Rights User register. It is a usermode-accessible register that controls whether writes and/or access to each individual pkey is allowed or denied. Signed-off-by: Dave Hansen Acked-by: Mel Gorman Cc: linux-arch@vger.kernel.org Cc: Dave Hansen Cc: arnd@arndb.de Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163015.444FE75F@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- include/linux/pkeys.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 6899b0bc7ce0..8ff21125dc8a 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -4,11 +4,6 @@ #include #include -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 -#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ - PKEY_DISABLE_WRITE) - #ifdef CONFIG_ARCH_HAS_PKEYS #include #else /* ! CONFIG_ARCH_HAS_PKEYS */ @@ -17,6 +12,29 @@ #define arch_override_mprotect_pkey(vma, prot, pkey) (0) #define PKEY_DEDICATED_EXECUTE_ONLY 0 #define ARCH_VM_PKEY_FLAGS 0 + +static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) +{ + return (pkey == 0); +} + +static inline int mm_pkey_alloc(struct mm_struct *mm) +{ + return -1; +} + +static inline int mm_pkey_free(struct mm_struct *mm, int pkey) +{ + WARN_ONCE(1, "free of protection key when disabled"); + return -EINVAL; +} + +static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + return 0; +} + #endif /* ! CONFIG_ARCH_HAS_PKEYS */ #endif /* _LINUX_PKEYS_H */ -- cgit v1.2.3 From a60f7b69d92c0142c80a30d669a76b617b7f6879 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 29 Jul 2016 09:30:18 -0700 Subject: generic syscalls: Wire up memory protection keys syscalls These new syscalls are implemented as generic code, so enable them for architectures like arm64 which use the generic syscall table. According to Arnd: Even if the support is x86 specific for the forseeable future, it may be good to reserve the number just in case. The other architecture specific syscall lists are usually left to the individual arch maintainers, most a lot of the newer architectures share this table. Signed-off-by: Dave Hansen Acked-by: Arnd Bergmann Cc: linux-arch@vger.kernel.org Cc: Dave Hansen Cc: mgorman@techsingularity.net Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163018.505A6875@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- include/linux/syscalls.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d02239022bd0..0d7abb8b7315 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -898,4 +898,12 @@ asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in, asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); +asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, + unsigned long prot, int pkey); +asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); +asmlinkage long sys_pkey_free(int pkey); +//asmlinkage long sys_pkey_get(int pkey, unsigned long flags); +//asmlinkage long sys_pkey_set(int pkey, unsigned long access_rights, +// unsigned long flags); + #endif -- cgit v1.2.3 From acd547b29880800d29222c4632d2c145e401988c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 29 Jul 2016 09:30:21 -0700 Subject: x86/pkeys: Default to a restrictive init PKRU PKRU is the register that lets you disallow writes or all access to a given protection key. The XSAVE hardware defines an "init state" of 0 for PKRU: its most permissive state, allowing access/writes to everything. Since we start off all new processes with the init state, we start all processes off with the most permissive possible PKRU. This is unfortunate. If a thread is clone()'d [1] before a program has time to set PKRU to a restrictive value, that thread will be able to write to all data, no matter what pkey is set on it. This weakens any integrity guarantees that we want pkeys to provide. To fix this, we define a very restrictive PKRU to override the XSAVE-provided value when we create a new FPU context. We choose a value that only allows access to pkey 0, which is as restrictive as we can practically make it. This does not cause any practical problems with applications using protection keys because we require them to specify initial permissions for each key when it is allocated, which override the restrictive default. In the end, this ensures that threads which do not know how to manage their own pkey rights can not do damage to data which is pkey-protected. I would have thought this was a pretty contrived scenario, except that I heard a bug report from an MPX user who was creating threads in some very early code before main(). It may be crazy, but folks evidently _do_ it. Signed-off-by: Dave Hansen Cc: linux-arch@vger.kernel.org Cc: Dave Hansen Cc: mgorman@techsingularity.net Cc: arnd@arndb.de Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163021.F3C25D4A@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- include/linux/pkeys.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 8ff21125dc8a..e4c08c1ff0c5 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -35,6 +35,10 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, return 0; } +static inline void copy_init_pkru_to_fpregs(void) +{ +} + #endif /* ! CONFIG_ARCH_HAS_PKEYS */ #endif /* _LINUX_PKEYS_H */ -- cgit v1.2.3 From 86cdd72af936860503f392825410d1b60a3e474e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Sep 2016 14:08:26 +0100 Subject: drivers/perf: arm_pmu: add common attr group fields In preparation for adding common attribute groups, add an array of attribute group pointers to arm_pmu, which will be used if the backend hasn't already set pmu::attr_groups. Subsequent patches will move backends over to using these, before adding common fields. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index e18843809eec..268bc63f1358 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -14,7 +14,7 @@ #include #include - +#include #include /* @@ -77,6 +77,12 @@ struct pmu_hw_events { struct arm_pmu *percpu_pmu; }; +enum armpmu_attr_groups { + ARMPMU_ATTR_GROUP_EVENTS, + ARMPMU_ATTR_GROUP_FORMATS, + ARMPMU_NR_ATTR_GROUPS +}; + struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; @@ -111,6 +117,8 @@ struct arm_pmu { struct pmu_hw_events __percpu *hw_events; struct list_head entry; struct notifier_block cpu_pm_nb; + /* the attr_groups array must be NULL-terminated */ + const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; }; #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) -- cgit v1.2.3 From 48538b5863d8e8f8d567fc9a1d27a68623e0a0ff Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Sep 2016 14:08:30 +0100 Subject: drivers/perf: arm_pmu: expose a cpumask in sysfs In systems with heterogeneous CPUs, there are multiple logical CPU PMUs, each of which covers a subset of CPUs in the system. In some cases userspace needs to know which CPUs a given logical PMU covers, so we'd like to expose a cpumask under sysfs, similar to what is done for uncore PMUs. Unfortunately, prior to commit 00e727bb389359c8 ("perf stat: Balance opening and reading events"), perf stat only correctly handled a cpumask holding a single CPU, and only when profiling in system-wide mode. In other cases, the presence of a cpumask file could cause perf stat to behave erratically. Thus, exposing a cpumask file would break older perf binaries in cases where they would otherwise work. To avoid this issue while still providing userspace with the information it needs, this patch exposes a differently-named file (cpus) under sysfs. New tools can look for this and operate correctly, while older tools will not be adversely affected by its presence. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 268bc63f1358..dc1f2f30c961 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -78,6 +78,7 @@ struct pmu_hw_events { }; enum armpmu_attr_groups { + ARMPMU_ATTR_GROUP_COMMON, ARMPMU_ATTR_GROUP_EVENTS, ARMPMU_ATTR_GROUP_FORMATS, ARMPMU_NR_ATTR_GROUPS -- cgit v1.2.3 From 283e4a82999f48c61495436b9bbd0357a3268f9d Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 6 Sep 2016 06:04:20 -0300 Subject: [media] media: platform: pxa_camera: make a standalone v4l2 device This patch removes the soc_camera API dependency from pxa_camera. In the current status : - all previously captures are working the same on pxa270 - the s_crop() call was removed, judged not working (see what happens soc_camera_s_crop() when get_crop() == NULL) - if the pixel clock is provided by then sensor, ie. not MCLK, the dual stage change is not handled yet. => there is no in-tree user of this, so I'll let it that way - the MCLK is not yet finished, it's as in the legacy way, ie. activated at video device opening and closed at video device closing. In a subsequence patch pxa_camera_mclk_ops should be used, and platform data MCLK ignored. It will be the sensor's duty to request the clock and enable it, which will end in pxa_camera_mclk_ops. Signed-off-by: Robert Jarzmik Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/platform_data/media/camera-pxa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/media/camera-pxa.h b/include/linux/platform_data/media/camera-pxa.h index 6709b1cd7c77..ce5d90e1a6e4 100644 --- a/include/linux/platform_data/media/camera-pxa.h +++ b/include/linux/platform_data/media/camera-pxa.h @@ -37,6 +37,8 @@ struct pxacamera_platform_data { unsigned long flags; unsigned long mclk_10khz; + int sensor_i2c_adapter_id; + int sensor_i2c_address; }; extern void pxa_set_camera_info(struct pxacamera_platform_data *); -- cgit v1.2.3 From 9479c7cebfb568f8b8b424be7f1cac120e9eea95 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 26 Feb 2016 21:22:05 +0000 Subject: efi: Refactor efi_memmap_init_early() into arch-neutral code Every EFI architecture apart from ia64 needs to setup the EFI memory map at efi.memmap, and the code for doing that is essentially the same across all implementations. Therefore, it makes sense to factor this out into the common code under drivers/firmware/efi/. The only slight variation is the data structure out of which we pull the initial memory map information, such as physical address, memory descriptor size and version, etc. We can address this by passing a generic data structure (struct efi_memory_map_data) as the argument to efi_memmap_init_early() which contains the minimum info required for initialising the memory map. In the process, this patch also fixes a few undesirable implementation differences: - ARM and arm64 were failing to clear the EFI_MEMMAP bit when unmapping the early EFI memory map. EFI_MEMMAP indicates whether the EFI memory map is mapped (not the regions contained within) and can be traversed. It's more correct to set the bit as soon as we memremap() the passed in EFI memmap. - Rename efi_unmmap_memmap() to efi_memmap_unmap() to adhere to the regular naming scheme. This patch also uses a read-write mapping for the memory map instead of the read-only mapping currently used on ARM and arm64. x86 needs the ability to update the memory map in-place when assigning virtual addresses to regions (efi_map_region()) and tagging regions when reserving boot services (efi_reserve_boot_services()). There's no way for the generic fake_mem code to know which mapping to use without introducing some arch-specific constant/hook, so just use read-write since read-only is of dubious value for the EFI memory map. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Signed-off-by: Matt Fleming --- include/linux/efi.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7f5a58225385..d862d4998580 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -669,6 +669,18 @@ typedef struct { unsigned long tables; } efi_system_table_t; +/* + * Architecture independent structure for describing a memory map for the + * benefit of efi_memmap_init_early(), saving us the need to pass four + * parameters. + */ +struct efi_memory_map_data { + phys_addr_t phys_map; + unsigned long size; + unsigned long desc_version; + unsigned long desc_size; +}; + struct efi_memory_map { phys_addr_t phys_map; void *map; @@ -900,6 +912,10 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, } #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); + +extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); +extern void __init efi_memmap_unmap(void); + extern int efi_config_init(efi_config_table_type_t *arch_tables); #ifdef CONFIG_EFI_ESRT extern void __init efi_esrt_init(void); -- cgit v1.2.3 From dca0f971ea6fcf2f1bb78f7995adf80da9f4767f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 27 Feb 2016 15:52:50 +0000 Subject: efi: Add efi_memmap_init_late() for permanent EFI memmap Drivers need a way to access the EFI memory map at runtime. ARM and arm64 currently provide this by remapping the EFI memory map into the vmalloc space before setting up the EFI virtual mappings. x86 does not provide this functionality which has resulted in the code in efi_mem_desc_lookup() where it will manually map individual EFI memmap entries if the memmap has already been torn down on x86, /* * If a driver calls this after efi_free_boot_services, * ->map will be NULL, and the target may also not be mapped. * So just always get our own virtual map on the CPU. * */ md = early_memremap(p, sizeof (*md)); There isn't a good reason for not providing a permanent EFI memory map for runtime queries, especially since the EFI regions are not mapped into the standard kernel page tables. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Signed-off-by: Matt Fleming --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d862d4998580..f149676b2fcd 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -688,6 +688,7 @@ struct efi_memory_map { int nr_map; unsigned long desc_version; unsigned long desc_size; + bool late; }; struct efi_fdt_params { @@ -914,6 +915,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); +extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); extern int efi_config_init(efi_config_table_type_t *arch_tables); -- cgit v1.2.3 From 60863c0d1a96b740048cc7d94a2d00d6f89ba3d8 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 29 Feb 2016 20:30:39 +0000 Subject: efi: Split out EFI memory map functions into new file Also move the functions from the EFI fake mem driver since future patches will require access to the memmap insertion code even if CONFIG_EFI_FAKE_MEM isn't enabled. This will be useful when we need to build custom EFI memory maps to allow drivers to mark regions as reserved. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Cc: Taku Izumi Signed-off-by: Matt Fleming --- include/linux/efi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index f149676b2fcd..84c8638c7a8b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -691,6 +692,11 @@ struct efi_memory_map { bool late; }; +struct efi_mem_range { + struct range range; + u64 attribute; +}; + struct efi_fdt_params { u64 system_table; u64 mmap; @@ -917,6 +923,10 @@ extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); +extern int __init efi_memmap_split_count(efi_memory_desc_t *md, + struct range *range); +extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, + void *buf, struct efi_mem_range *mem); extern int efi_config_init(efi_config_table_type_t *arch_tables); #ifdef CONFIG_EFI_ESRT -- cgit v1.2.3 From c45f4da33a297f85435f8dccb26a24852ea01bb9 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 22 Jun 2016 16:54:00 +0100 Subject: efi: Add efi_memmap_install() for installing new EFI memory maps While efi_memmap_init_{early,late}() exist for architecture code to install memory maps from firmware data and for the virtual memory regions respectively, drivers don't care which stage of the boot we're at and just want to swap the existing memmap for a modified one. efi_memmap_install() abstracts the details of how the new memory map should be mapped and the existing one unmapped. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Cc: Taku Izumi Signed-off-by: Matt Fleming --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 84c8638c7a8b..987c18f6fcae 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -923,6 +923,7 @@ extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); +extern int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map); extern int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range); extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, -- cgit v1.2.3 From 816e76129ed5fadd28e526c43397c79775194b5c Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 29 Feb 2016 21:22:52 +0000 Subject: efi: Allow drivers to reserve boot services forever Today, it is not possible for drivers to reserve EFI boot services for access after efi_free_boot_services() has been called on x86. For ARM/arm64 it can be done simply by calling memblock_reserve(). Having this ability for all three architectures is desirable for a couple of reasons, 1) It saves drivers copying data out of those regions 2) kexec reboot can now make use of things like ESRT Instead of using the standard memblock_reserve() which is insufficient to reserve the region on x86 (see efi_reserve_boot_services()), a new API is introduced in this patch; efi_mem_reserve(). efi.memmap now always represents which EFI memory regions are available. On x86 the EFI boot services regions that have not been reserved via efi_mem_reserve() will be removed from efi.memmap during efi_free_boot_services(). This has implications for kexec, since it is not possible for a newly kexec'd kernel to access the same boot services regions that the initial boot kernel had access to unless they are reserved by every kexec kernel in the chain. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Signed-off-by: Matt Fleming --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 987c18f6fcae..3fe4f3c47834 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -944,6 +944,7 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); extern u64 efi_mem_desc_end(efi_memory_desc_t *md); extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); +extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern void efi_reserve_boot_services(void); -- cgit v1.2.3 From 31ce8cc68180803aa481c0c1daac29d8eaceca9d Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 1 Mar 2016 23:02:56 +0000 Subject: efi/runtime-map: Use efi.memmap directly instead of a copy Now that efi.memmap is available all of the time there's no need to allocate and build a separate copy of the EFI memory map. Furthermore, efi.memmap contains boot services regions but only those regions that have been reserved via efi_mem_reserve(). Using efi.memmap allows us to pass boot services across kexec reboot so that the ESRT and BGRT drivers will now work. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Signed-off-by: Matt Fleming --- include/linux/efi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 3fe4f3c47834..d8b555db81c7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1357,7 +1357,6 @@ extern int efi_capsule_update(efi_capsule_header_t *capsule, #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); -void efi_runtime_map_setup(void *, int, u32); int efi_get_runtime_map_size(void); int efi_get_runtime_map_desc_size(void); int efi_runtime_map_copy(void *buf, size_t bufsz); @@ -1367,9 +1366,6 @@ static inline int efi_runtime_map_init(struct kobject *kobj) return 0; } -static inline void -efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {} - static inline int efi_get_runtime_map_size(void) { return 0; -- cgit v1.2.3 From 217b27d4671a0a3f34147f1b341683d36b7457db Mon Sep 17 00:00:00 2001 From: Sylvain Chouleur Date: Fri, 15 Jul 2016 21:36:29 +0200 Subject: efi: Use a file local lock for efivars This patch replaces the spinlock in the efivars struct with a single lock for the whole vars.c file. The goal of this lock is to protect concurrent calls to efi variable services, registering and unregistering. This allows us to register new efivars operations without having in-progress call. Signed-off-by: Sylvain Chouleur Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Mark Rutland Cc: Sylvain Chouleur Signed-off-by: Matt Fleming --- include/linux/efi.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d8b555db81c7..deecb2902715 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1157,12 +1157,6 @@ struct efivar_operations { }; struct efivars { - /* - * ->lock protects two things: - * 1) efivarfs_list and efivars_sysfs_list - * 2) ->ops calls - */ - spinlock_t lock; struct kset *kset; struct kobject *kobject; const struct efivar_operations *ops; -- cgit v1.2.3 From 21b3ddd39feecd2f4d6c52bcd30f0a4fa14f125a Mon Sep 17 00:00:00 2001 From: Sylvain Chouleur Date: Fri, 15 Jul 2016 21:36:30 +0200 Subject: efi: Don't use spinlocks for efi vars All efivars operations are protected by a spinlock which prevents interruptions and preemption. This is too restricted, we just need a lock preventing concurrency. The idea is to use a semaphore of count 1 and to have two ways of locking, depending on the context: - In interrupt context, we call down_trylock(), if it fails we return an error - In normal context, we call down_interruptible() We don't use a mutex here because the mutex_trylock() function must not be called from interrupt context, whereas the down_trylock() can. Signed-off-by: Sylvain Chouleur Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Mark Rutland Cc: Sylvain Chouleur Signed-off-by: Matt Fleming --- include/linux/efi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index deecb2902715..4d6da7b66c19 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1297,8 +1297,8 @@ struct kobject *efivars_kobject(void); int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head); -void efivar_entry_add(struct efivar_entry *entry, struct list_head *head); -void efivar_entry_remove(struct efivar_entry *entry); +int efivar_entry_add(struct efivar_entry *entry, struct list_head *head); +int efivar_entry_remove(struct efivar_entry *entry); int __efivar_entry_delete(struct efivar_entry *entry); int efivar_entry_delete(struct efivar_entry *entry); @@ -1315,7 +1315,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, bool block, unsigned long size, void *data); -void efivar_entry_iter_begin(void); +int efivar_entry_iter_begin(void); void efivar_entry_iter_end(void); int __efivar_entry_iter(int (*func)(struct efivar_entry *, void *), -- cgit v1.2.3 From dce48e351c0d42014e5fb16ac3eb099e11b7e716 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Jul 2016 21:36:31 +0200 Subject: efi: Replace runtime services spinlock with semaphore The purpose of the efi_runtime_lock is to prevent concurrent calls into the firmware. There is no need to use spinlocks here, as long as we ensure that runtime service invocations from an atomic context (i.e., EFI pstore) cannot block. So use a semaphore instead, and use down_trylock() in the nonblocking case. We don't use a mutex here because the mutex_trylock() function must not be called from interrupt context, whereas the down_trylock() can. Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Mark Rutland Cc: Sylvain Chouleur Signed-off-by: Matt Fleming --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4d6da7b66c19..4c92c0630c45 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -38,6 +38,7 @@ #define EFI_WRITE_PROTECTED ( 8 | (1UL << (BITS_PER_LONG-1))) #define EFI_OUT_OF_RESOURCES ( 9 | (1UL << (BITS_PER_LONG-1))) #define EFI_NOT_FOUND (14 | (1UL << (BITS_PER_LONG-1))) +#define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1))) #define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) typedef unsigned long efi_status_t; -- cgit v1.2.3 From f035a51536af9802f55d8c79bd87f184ebffb093 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:29 +0200 Subject: bpf: add BPF_SIZEOF and BPF_FIELD_SIZEOF macros Add BPF_SIZEOF() and BPF_FIELD_SIZEOF() macros to improve the code a bit which otherwise often result in overly long bytes_to_bpf_size(sizeof()) and bytes_to_bpf_size(FIELD_SIZEOF()) lines. So place them into a macro helper instead. Moreover, we currently have a BUILD_BUG_ON(BPF_FIELD_SIZEOF()) check in convert_bpf_extensions(), but we should rather make that generic as well and add a BUILD_BUG_ON() test in all BPF_SIZEOF()/BPF_FIELD_SIZEOF() users to detect any rewriter size issues at compile time. Note, there are currently none, but we want to assert that it stays this way. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index a16439b99fd9..7fabad8dc3fc 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -314,6 +314,20 @@ struct bpf_prog_aux; bpf_size; \ }) +#define BPF_SIZEOF(type) \ + ({ \ + const int __size = bytes_to_bpf_size(sizeof(type)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + +#define BPF_FIELD_SIZEOF(type, field) \ + ({ \ + const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { -- cgit v1.2.3 From f3694e00123802d688180e7ae90b240669910e3c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 9 Sep 2016 02:45:31 +0200 Subject: bpf: add BPF_CALL_x macros for declaring helpers This work adds BPF_CALL_() macros and converts all the eBPF helper functions to use them, in a similar fashion like we do with SYSCALL_DEFINE() macros that are used today. Motivation for this is to hide all the register handling and all necessary casts from the user, so that it is done automatically in the background when adding a BPF_CALL_() call. This makes current helpers easier to review, eases to write future helpers, avoids getting the casting mess wrong, and allows for extending all helpers at once (f.e. build time checks, etc). It also helps detecting more easily in code reviews that unused registers are not instrumented in the code by accident, breaking compatibility with existing programs. BPF_CALL_() internals are quite similar to SYSCALL_DEFINE() ones with some fundamental differences, for example, for generating the actual helper function that carries all u64 regs, we need to fill unused regs, so that we always end up with 5 u64 regs as an argument. I reviewed several 0-5 generated BPF_CALL_() variants of the .i results and they look all as expected. No sparse issue spotted. We let this also sit for a few days with Fengguang's kbuild test robot, and there were no issues seen. On s390, it barked on the "uses dynamic stack allocation" notice, which is an old one from bpf_perf_event_output{,_tp}() reappearing here due to the conversion to the call wrapper, just telling that the perf raw record/frag sits on stack (gcc with s390's -mwarn-dynamicstack), but that's all. Did various runtime tests and they were fine as well. All eBPF helpers are now converted to use these macros, getting rid of a good chunk of all the raw castings. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7fabad8dc3fc..1f09c521adfe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -328,6 +328,56 @@ struct bpf_prog_aux; __size; \ }) +#define __BPF_MAP_0(m, v, ...) v +#define __BPF_MAP_1(m, v, t, a, ...) m(t, a) +#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) +#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__) +#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__) +#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__) + +#define __BPF_REG_0(...) __BPF_PAD(5) +#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4) +#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3) +#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2) +#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1) +#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__) + +#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__) +#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__) + +#define __BPF_CAST(t, a) \ + (__force t) \ + (__force \ + typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long), \ + (unsigned long)0, (t)0))) a +#define __BPF_V void +#define __BPF_N + +#define __BPF_DECL_ARGS(t, a) t a +#define __BPF_DECL_REGS(t, a) u64 a + +#define __BPF_PAD(n) \ + __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ + u64, __ur_3, u64, __ur_4, u64, __ur_5) + +#define BPF_CALL_x(x, name, ...) \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + { \ + return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ + } \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) + +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { -- cgit v1.2.3 From cc60211237086d718e463bcee74004b5bd38a78c Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 18 Jul 2016 16:16:29 +0900 Subject: extcon: adc-jack: Remove the usage of extcon_set_state() This patch removes the usage of extcon_set_state() because it uses the bit masking to change the state of external connectors. The extcon framework should handle the state by extcon_set/get_cable_state_() with extcon id. Signed-off-by: Chanwoo Choi --- include/linux/extcon/extcon-adc-jack.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h index ac85f2061351..a0e03b13b449 100644 --- a/include/linux/extcon/extcon-adc-jack.h +++ b/include/linux/extcon/extcon-adc-jack.h @@ -20,8 +20,8 @@ /** * struct adc_jack_cond - condition to use an extcon state - * @state: the corresponding extcon state (if 0, this struct * denotes the last adc_jack_cond element among the array) + * @id: the unique id of each external connector * @min_adc: min adc value for this condition * @max_adc: max adc value for this condition * @@ -33,7 +33,7 @@ * because when no adc_jack_cond is met, state = 0 is automatically chosen. */ struct adc_jack_cond { - u32 state; /* extcon state value. 0 if invalid */ + unsigned int id; u32 min_adc; u32 max_adc; }; -- cgit v1.2.3 From 84c48dc55945b4edfb63388832ebcca82d003ee7 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 1 Jul 2016 02:41:18 +0900 Subject: extcon: Block the bit masking operation for cable state except for extcon core This patch restrict the usage of extcon_update_state() in the extcon core because the extcon_update_state() use the bit masking to change the state of external connector. When this function is used in device drivers, it may occur the probelm with the handling mistake of bit masking. Also, this patch removes the extcon_get/set_state() functions because these functions use the bit masking which is reluctant way. Instead, extcon provides the extcon_set/get_cable_state_() functions. Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 61004413dc64..667b1d35af12 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -149,20 +149,6 @@ extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, const unsigned int *cable); extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); -/* - * get/set/update_state access the 32b encoded state value, which represents - * states of all possible cables of the multistate port. For example, if one - * calls extcon_set_state(edev, 0x7), it may mean that all the three cables - * are attached to the port. - */ -static inline u32 extcon_get_state(struct extcon_dev *edev) -{ - return edev->state; -} - -extern int extcon_set_state(struct extcon_dev *edev, u32 state); -extern int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state); - /* * get/set_cable_state access each bit of the 32b encoded state value. * They are used to access the status of each cable based on the cable id. @@ -232,22 +218,6 @@ static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, static inline void devm_extcon_dev_free(struct extcon_dev *edev) { } -static inline u32 extcon_get_state(struct extcon_dev *edev) -{ - return 0; -} - -static inline int extcon_set_state(struct extcon_dev *edev, u32 state) -{ - return 0; -} - -static inline int extcon_update_state(struct extcon_dev *edev, u32 mask, - u32 state) -{ - return 0; -} - static inline int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id) { -- cgit v1.2.3 From 505cf01f984bdcf088c9ec1e96f987f1ff47dc21 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 11 Jul 2016 16:34:52 +0900 Subject: extcon: Add the extcon_type to gather each connector into five category This patch adds the new extcon type to group the each connecotr into following five category. This type would be used to handle the connectors as a group unit instead of a connector unit. - EXTCON_TYPE_USB : USB connector - EXTCON_TYPE_CHG : Charger connector - EXTCON_TYPE_JACK : Jack connector - EXTCON_TYPE_DISP : Display connector - EXTCON_TYPE_MISC : Miscellaneous connector Also, each external connector is possible to belong to one more extcon type. In caes of EXTCON_CHG_USB_SDP, it have the EXTCON_TYPE_CHG and EXTCON_TYPE_USB. Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Signed-off-by: MyungJoo Ham Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 667b1d35af12..46d802892c82 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -28,6 +28,15 @@ #include +/* + * Define the type of supported external connectors + */ +#define EXTCON_TYPE_USB BIT(0) /* USB connector */ +#define EXTCON_TYPE_CHG BIT(1) /* Charger connector */ +#define EXTCON_TYPE_JACK BIT(2) /* Jack connector */ +#define EXTCON_TYPE_DISP BIT(3) /* Display connector */ +#define EXTCON_TYPE_MISC BIT(4) /* Miscellaneous connector */ + /* * Define the unique id of supported external connectors */ -- cgit v1.2.3 From 067c1652e7a7d50d951eee1d34a414ea931cee6c Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 11 Jul 2016 19:30:43 +0900 Subject: extcon: Add the support for extcon property according to extcon type This patch support the extcon property for the external connector because each external connector might have the property according to the H/W design and the specific characteristics. - EXTCON_PROP_USB_[property name] - EXTCON_PROP_CHG_[property name] - EXTCON_PROP_JACK_[property name] - EXTCON_PROP_DISP_[property name] Add the new extcon APIs to get/set the property value as following: - int extcon_get_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value *prop_val) - int extcon_set_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val) Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 46d802892c82..f9d4a44e86d3 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -77,6 +77,63 @@ #define EXTCON_NUM 63 +/* + * Define the property of supported external connectors. + * + * When adding the new extcon property, they *must* have + * the type/value/default information. Also, you *have to* + * modify the EXTCON_PROP_[type]_START/END definitions + * which mean the range of the supported properties + * for each extcon type. + * + * The naming style of property + * : EXTCON_PROP_[type]_[property name] + * + * EXTCON_PROP_USB_[property name] : USB property + * EXTCON_PROP_CHG_[property name] : Charger property + * EXTCON_PROP_JACK_[property name] : Jack property + * EXTCON_PROP_DISP_[property name] : Display property + */ + +/* + * Properties of EXTCON_TYPE_USB. + * + * - EXTCON_PROP_USB_VBUS + * @type: integer (intval) + * @value: 0 (low) or 1 (high) + * @default: 0 (low) + */ +#define EXTCON_PROP_USB_VBUS 0 + +#define EXTCON_PROP_USB_MIN 0 +#define EXTCON_PROP_USB_MAX 0 +#define EXTCON_PROP_USB_CNT (EXTCON_PROP_USB_MAX - EXTCON_PROP_USB_MIN + 1) + +/* Properties of EXTCON_TYPE_CHG. */ +#define EXTCON_PROP_CHG_MIN 50 +#define EXTCON_PROP_CHG_MAX 50 +#define EXTCON_PROP_CHG_CNT (EXTCON_PROP_CHG_MAX - EXTCON_PROP_CHG_MIN + 1) + +/* Properties of EXTCON_TYPE_JACK. */ +#define EXTCON_PROP_JACK_MIN 100 +#define EXTCON_PROP_JACK_MAX 100 +#define EXTCON_PROP_JACK_CNT (EXTCON_PROP_JACK_MAX - EXTCON_PROP_JACK_MIN + 1) + +/* Properties of EXTCON_TYPE_DISP. */ +#define EXTCON_PROP_DISP_MIN 150 +#define EXTCON_PROP_DISP_MAX 150 +#define EXTCON_PROP_DISP_CNT (EXTCON_PROP_DISP_MAX - EXTCON_PROP_DISP_MIN + 1) + +/* + * Define the type of property's value. + * + * Define the property's value as union type. Because each property + * would need the different data type to store it. + */ +union extcon_property_value { + int intval; /* type : integer (intval) */ +}; + struct extcon_cable; /** @@ -166,6 +223,17 @@ extern int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id); extern int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, bool cable_state); +/* + * get/set_property access the property value of each external connector. + * They are used to access the property of each cable based on the property id. + */ +extern int extcon_get_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value *prop_val); +extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value prop_val); + /* * Following APIs are to monitor every action of a notifier. * Registrar gets notified for every external port of a connection device. @@ -239,6 +307,19 @@ static inline int extcon_set_cable_state_(struct extcon_dev *edev, return 0; } +static inline int extcon_get_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value *prop_val) +{ + return 0; +} +static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value prop_val) +{ + return 0; +} + static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) { return NULL; -- cgit v1.2.3 From ceaa98f442cf09dc73946c6402489344367905ae Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 25 Jul 2016 21:15:19 +0900 Subject: extcon: Add the support for the capability of each property This patch adds the support of the property capability setting. This function decides the supported properties of each external connector on extcon provider driver. Ths list of new extcon APIs to get/set the capability of property as following: - int extcon_get_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); - int extcon_set_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index f9d4a44e86d3..f08469089f74 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -234,6 +234,16 @@ extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val); +/* + * get/set_property_capability set the capability of the property for each + * external connector. They are used to set the capability of the property + * of each external connector based on the id and property. + */ +extern int extcon_get_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop); +extern int extcon_set_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop); + /* * Following APIs are to monitor every action of a notifier. * Registrar gets notified for every external port of a connection device. @@ -320,6 +330,18 @@ static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, return 0; } +static inline int extcon_get_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop) +{ + return 0; +} + +static inline int extcon_set_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop) +{ + return 0; +} + static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) { return NULL; -- cgit v1.2.3 From 35872fdcbf5c109dab03fb36ddec35b7bad7d762 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 22 Jul 2016 13:03:17 +0900 Subject: extcon: Rename the extcon_set/get_state() to maintain the function naming pattern This patch just renames the existing extcon_get/set_cable_state_() as following because of maintaining the function naming pattern like as extcon APIs for property. - extcon_set_cable_state_() -> extcon_set_state() - extcon_get_cable_state_() -> extcon_get_state() But, this patch remains the old extcon_set/get_cable_state_() functions to prevent the build break. After altering new APIs, remove the old APIs. Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index f08469089f74..4fa37385c97a 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -216,11 +216,11 @@ extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); /* - * get/set_cable_state access each bit of the 32b encoded state value. + * get/set_state access each bit of the 32b encoded state value. * They are used to access the status of each cable based on the cable id. */ -extern int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id); -extern int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, +extern int extcon_get_state(struct extcon_dev *edev, unsigned int id); +extern int extcon_set_state(struct extcon_dev *edev, unsigned int id, bool cable_state); /* @@ -305,14 +305,14 @@ static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, static inline void devm_extcon_dev_free(struct extcon_dev *edev) { } -static inline int extcon_get_cable_state_(struct extcon_dev *edev, - unsigned int id) + +static inline int extcon_get_state(struct extcon_dev *edev, unsigned int id) { return 0; } -static inline int extcon_set_cable_state_(struct extcon_dev *edev, - unsigned int id, bool cable_state) +static inline int extcon_set_state(struct extcon_dev *edev, unsigned int id, + bool cable_state) { return 0; } @@ -402,4 +402,15 @@ static inline int extcon_unregister_interest(struct extcon_specific_cable_nb { return -EINVAL; } + +static inline int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id) +{ + return extcon_get_state(edev, id); +} + +static inline int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, + bool cable_state) +{ + return extcon_set_state(edev, id, cable_state); +} #endif /* __LINUX_EXTCON_H__ */ -- cgit v1.2.3 From a580982f0836e079171f65f22d82768a12f85570 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 22 Jul 2016 13:16:34 +0900 Subject: extcon: Add the synchronization extcon APIs to support the notification This patch adds the synchronization extcon APIs to support the notifications for both state and property. When extcon_*_sync() functions is called, the extcon informs the information from extcon provider to extcon client. The extcon driver may need to change the both state and multiple properties at the same time. After setting the data of a external connector, the extcon send the notification to client driver with the extcon_*_sync(). The list of new extcon APIs as following: - extcon_sync() : Send the notification for each external connector to synchronize the information between extcon provider driver and extcon client driver. - extcon_set_state_sync() : Set the state of external connector with noti. - extcon_set_property_sync() : Set the property of external connector with noti. For example, case 1, change the state of external connector and synchronized the data. extcon_set_state_sync(edev, EXTCON_USB, 1); case 2, change both the state and property of external connector and synchronized the data. extcon_set_state(edev, EXTCON_USB, 1); extcon_set_property(edev, EXTCON_USB, EXTCON_PROP_USB_VBUS 1); extcon_sync(edev, EXTCON_USB); case 3, change the property of external connector and synchronized the data. extcon_set_property(edev, EXTCON_USB, EXTCON_PROP_USB_VBUS, 0); extcon_sync(edev, EXTCON_USB); case 4, change the property of external connector and synchronized the data. extcon_set_property_sync(edev, EXTCON_USB, EXTCON_PROP_USB_VBUS, 0); Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 4fa37385c97a..162c46a42bac 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -222,6 +222,13 @@ extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); extern int extcon_get_state(struct extcon_dev *edev, unsigned int id); extern int extcon_set_state(struct extcon_dev *edev, unsigned int id, bool cable_state); +extern int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, + bool cable_state); + +/* + * Synchronize the state and property data for a specific external connector. + */ +extern int extcon_sync(struct extcon_dev *edev, unsigned int id); /* * get/set_property access the property value of each external connector. @@ -233,6 +240,9 @@ extern int extcon_get_property(struct extcon_dev *edev, unsigned int id, extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val); +extern int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value prop_val); /* * get/set_property_capability set the capability of the property for each @@ -317,6 +327,17 @@ static inline int extcon_set_state(struct extcon_dev *edev, unsigned int id, return 0; } +static inline int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, + bool cable_state) +{ + return 0; +} + +static inline int extcon_sync(struct extcon_dev *edev, unsigned int id) +{ + return 0; +} + static inline int extcon_get_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value *prop_val) @@ -330,6 +351,13 @@ static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, return 0; } +static inline int extcon_set_property_sync(struct extcon_dev *edev, + unsigned int id, unsigned int prop, + union extcon_property_value prop_val) +{ + return 0; +} + static inline int extcon_get_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop) { @@ -411,6 +439,6 @@ static inline int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int static inline int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, bool cable_state) { - return extcon_set_state(edev, id, cable_state); + return extcon_set_state_sync(edev, id, cable_state); } #endif /* __LINUX_EXTCON_H__ */ -- cgit v1.2.3 From 2f25140601115cd1b278e208099c9ebc627b9481 Mon Sep 17 00:00:00 2001 From: Chris Zhong Date: Fri, 22 Jul 2016 01:13:02 +0900 Subject: extcon: Add EXTCON_DISP_DP and the property for USB Type-C Add EXTCON_DISP_DP for the Display external connector. For Type-C connector the DisplayPort can work as an Alternate Mode(VESA DisplayPort Alt Mode on USB Type-C Standard). The Type-C support both normal and flipped orientation, so add a property to extcon. Signed-off-by: Chris Zhong Signed-off-by: Chanwoo Choi Tested-by: Chris Zhong Tested-by: Guenter Roeck Reviewed-by: Guenter Roeck --- include/linux/extcon.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 162c46a42bac..ad7a1606a7f3 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -69,6 +69,7 @@ #define EXTCON_DISP_MHL 41 /* Mobile High-Definition Link */ #define EXTCON_DISP_DVI 42 /* Digital Visual Interface */ #define EXTCON_DISP_VGA 43 /* Video Graphics Array */ +#define EXTCON_DISP_DP 44 /* Display Port */ /* Miscellaneous external connector */ #define EXTCON_DOCK 60 @@ -102,11 +103,16 @@ * @type: integer (intval) * @value: 0 (low) or 1 (high) * @default: 0 (low) + * - EXTCON_PROP_USB_TYPEC_POLARITY + * @type: integer (intval) + * @value: 0 (normal) or 1 (flip) + * @default: 0 (normal) */ #define EXTCON_PROP_USB_VBUS 0 +#define EXTCON_PROP_USB_TYPEC_POLARITY 1 #define EXTCON_PROP_USB_MIN 0 -#define EXTCON_PROP_USB_MAX 0 +#define EXTCON_PROP_USB_MAX 1 #define EXTCON_PROP_USB_CNT (EXTCON_PROP_USB_MAX - EXTCON_PROP_USB_MIN + 1) /* Properties of EXTCON_TYPE_CHG. */ -- cgit v1.2.3 From 736d25b115e8f7b6728f39a993d784aac1c6118b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 5 Aug 2016 17:49:23 +0900 Subject: extcon: Add new EXTCON_DISP_HMD for Head-mounted Display device This patch adds the new EXTCON_DISP_HMD id for Head-mounted Display[1] device. The HMD device is usually for USB connector type So, the HMD connector has the two extcon types of both EXTCON_TYPE_DISP and EXTCON_TYPE_USB. [1] https://en.wikipedia.org/wiki/Head-mounted_display Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index ad7a1606a7f3..e79b644f41a7 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -70,6 +70,7 @@ #define EXTCON_DISP_DVI 42 /* Digital Visual Interface */ #define EXTCON_DISP_VGA 43 /* Video Graphics Array */ #define EXTCON_DISP_DP 44 /* Display Port */ +#define EXTCON_DISP_HMD 45 /* Head-Mounted Display */ /* Miscellaneous external connector */ #define EXTCON_DOCK 60 -- cgit v1.2.3 From af9b9285f2e9b5a625284f92fa508141b26ec381 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 5 Aug 2016 18:15:46 +0900 Subject: extcon: Add new EXTCON_CHG_WPT for Wireless Power Transfer device This patchs add the new EXTCON_CHG_WPT for Wireless Power Transfer[1]. The Wireless Power Transfer is the transmission of electronical energy from a power source. The EXTCON_CHG_WPT has the EXTCON_TYPE_CHG. [1] https://en.wikipedia.org/wiki/Wireless_power_transfer Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index e79b644f41a7..461abee969b7 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -53,6 +53,7 @@ #define EXTCON_CHG_USB_ACA 8 /* Accessory Charger Adapter */ #define EXTCON_CHG_USB_FAST 9 #define EXTCON_CHG_USB_SLOW 10 +#define EXTCON_CHG_WPT 11 /* Wireless Power Transfer */ /* Jack external connector */ #define EXTCON_JACK_MICROPHONE 20 -- cgit v1.2.3 From 8df0cfe6c6c4a9355989baa8de9f166b2bc51f76 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 15 Aug 2016 06:15:35 -0700 Subject: extcon: Introduce EXTCON_PROP_USB_SS property for SuperSpeed mode EXTCON_PROP_USB_SS (SuperSpeed)[1] is necessary to distinguish between USB/USB2 and USB3 connections on USB Type-C cables. [1] https://en.wikipedia.org/wiki/USB#Overview Cc: Chris Zhong Signed-off-by: Guenter Roeck Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 461abee969b7..b34d1ae9011f 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -109,12 +109,18 @@ * @type: integer (intval) * @value: 0 (normal) or 1 (flip) * @default: 0 (normal) + * - EXTCON_PROP_USB_SS (SuperSpeed) + * @type: integer (intval) + * @value: 0 (USB/USB2) or 1 (USB3) + * @default: 0 (USB/USB2) + * */ #define EXTCON_PROP_USB_VBUS 0 #define EXTCON_PROP_USB_TYPEC_POLARITY 1 +#define EXTCON_PROP_USB_SS 2 #define EXTCON_PROP_USB_MIN 0 -#define EXTCON_PROP_USB_MAX 1 +#define EXTCON_PROP_USB_MAX 2 #define EXTCON_PROP_USB_CNT (EXTCON_PROP_USB_MAX - EXTCON_PROP_USB_MIN + 1) /* Properties of EXTCON_TYPE_CHG. */ -- cgit v1.2.3 From cac18ecb6f44b11bc303d7afbae3887b27938fa4 Mon Sep 17 00:00:00 2001 From: Randy Li Date: Sat, 10 Sep 2016 02:59:37 +0800 Subject: phy: Add reset callback The only use for this is for solving a hardware design problem in usb of Rockchip RK3288. Signed-off-by: Randy Li Reviewed-by: Heiko Stuebner Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index f08b67238b58..ee1bed7dbfc6 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -36,6 +36,7 @@ enum phy_mode { * @power_on: powering on the phy * @power_off: powering off the phy * @set_mode: set the mode of the phy + * @reset: resetting the phy * @owner: the module owner containing the ops */ struct phy_ops { @@ -44,6 +45,7 @@ struct phy_ops { int (*power_on)(struct phy *phy); int (*power_off)(struct phy *phy); int (*set_mode)(struct phy *phy, enum phy_mode mode); + int (*reset)(struct phy *phy); struct module *owner; }; @@ -136,6 +138,7 @@ int phy_exit(struct phy *phy); int phy_power_on(struct phy *phy); int phy_power_off(struct phy *phy); int phy_set_mode(struct phy *phy, enum phy_mode mode); +int phy_reset(struct phy *phy); static inline int phy_get_bus_width(struct phy *phy) { return phy->attrs.bus_width; -- cgit v1.2.3 From 702a7b8e064a93df0b63e9d718b666a9851088fc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 1 Sep 2016 10:27:17 +0200 Subject: iio: trigger: helpers to determine own trigger This adds a helper function to the IIO trigger framework: iio_trigger_using_own(): for an IIO device, this tells whether the device is using itself as a trigger. This is true if the indio device: (A) supplies a trigger and (B) has assigned its own buffer poll function to use this trigger. This helper function is good when constructing triggered, buffered drivers that can either use its own hardware *OR* an external trigger such as a HRTimer or even the trigger from a totally different sensor. Under such circumstances it is important to know for example if the timestamp from the same trigger hardware should be used when populating the buffer: if iio_trigger_using_own() is true, we can use this timestamp, else we need to pick a unique timestamp directly in the trigger handler. For this to work of course IIO devices registering hardware triggers must follow the convention to set the parent device properly, as as well as setting the parent of the IIO device itself. When a new poll function is attached, we check if the parent device of the IIO of the poll function is the same as the parent device of the trigger and in that case we conclude that the hardware is using itself as trigger. Cc: Giuseppe Barba Cc: Denis Ciocca Cc: Crestez Dan Leonard Cc: Gregor Boirie Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- include/linux/iio/trigger.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index f0890a5abf13..4f1154f7a33c 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -56,6 +56,9 @@ struct iio_trigger_ops { * @subirqs: [INTERN] information about the 'child' irqs. * @pool: [INTERN] bitmap of irqs currently in use. * @pool_lock: [INTERN] protection of the irq pool. + * @attached_own_device:[INTERN] if we are using our own device as trigger, + * i.e. if we registered a poll function to the same + * device as the one providing the trigger. **/ struct iio_trigger { const struct iio_trigger_ops *ops; @@ -73,6 +76,7 @@ struct iio_trigger { struct iio_subirq subirqs[CONFIG_IIO_CONSUMERS_PER_TRIGGER]; unsigned long pool[BITS_TO_LONGS(CONFIG_IIO_CONSUMERS_PER_TRIGGER)]; struct mutex pool_lock; + bool attached_own_device; }; @@ -160,6 +164,13 @@ irqreturn_t iio_trigger_generic_data_rdy_poll(int irq, void *private); __printf(1, 2) struct iio_trigger *iio_trigger_alloc(const char *fmt, ...); void iio_trigger_free(struct iio_trigger *trig); +/** + * iio_trigger_using_own() - tells us if we use our own HW trigger ourselves + * @indio_dev: device to check + */ +bool iio_trigger_using_own(struct iio_dev *indio_dev); + + #else struct iio_trigger; struct iio_trigger_ops; -- cgit v1.2.3 From 6b6adee3dad25bbe568ee24fc843372d02fb425f Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:18 +0300 Subject: net/mlx5: SRIOV core code refactoring Simplify the code and makes it look modular and symmetric. Split sriov enable/disable to two levels: device level and pci level. When user enable/disable sriov (via sriov_configure driver callback) we will enable/disable both device and pci sriov. When driver load/unload we will enable/disable (on demand) only device sriov while keeping the PCI sriov enabled for next driver load. On internal/pci error, VFs will be kept enabled on PCI and the reset is done only in device level. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5cb9fa7aec61..0d7aedfce1d7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -828,8 +828,6 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); -int mlx5_sriov_init(struct mlx5_core_dev *dev); -int mlx5_sriov_cleanup(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); -- cgit v1.2.3 From 737a234bb6384800a5b632be85c6b0ad6221d137 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 9 Sep 2016 17:35:19 +0300 Subject: net/mlx5: Introduce attach/detach to interface API Add attach/detach callbacks to interface API. This is crucial for implementing seamless reset flow which releases the hardware and it's resources upon detach while keeping software structures and state (e.g netdev) then reset and reallocate the hardware needed resources upon attach. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0d7aedfce1d7..85c4786427e4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -930,6 +930,8 @@ enum { struct mlx5_interface { void * (*add)(struct mlx5_core_dev *dev); void (*remove)(struct mlx5_core_dev *dev, void *context); + int (*attach)(struct mlx5_core_dev *dev, void *context); + void (*detach)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); void * (*get_dev)(void *context); -- cgit v1.2.3 From 3a8963acc70e69606729404713cfa9a03b58b18c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Sep 2016 12:45:24 -0700 Subject: Revert "hv_netvsc: make inline functions static" These functions are used by other code misc-next tree. This reverts commit 30d1de08c87ddde6f73936c3350e7e153988fe02. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b01c8c3dd531..5df444b1ac18 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1429,4 +1429,88 @@ static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) return false; } +/* + * An API to support in-place processing of incoming VMBUS packets. + */ +#define VMBUS_PKT_TRAILER 8 + +static inline struct vmpacket_descriptor * +get_next_pkt_raw(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + void *ring_buffer = hv_get_ring_buffer(ring_info); + struct vmpacket_descriptor *cur_desc; + u32 packetlen; + u32 dsize = ring_info->ring_datasize; + u32 delta = read_loc - ring_info->ring_buffer->read_index; + u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); + + if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) + return NULL; + + if ((read_loc + sizeof(*cur_desc)) > dsize) + return NULL; + + cur_desc = ring_buffer + read_loc; + packetlen = cur_desc->len8 << 3; + + /* + * If the packet under consideration is wrapping around, + * return failure. + */ + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) + return NULL; + + return cur_desc; +} + +/* + * A helper function to step through packets "in-place" + * This API is to be called after each successful call + * get_next_pkt_raw(). + */ +static inline void put_pkt_raw(struct vmbus_channel *channel, + struct vmpacket_descriptor *desc) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + u32 packetlen = desc->len8 << 3; + u32 dsize = ring_info->ring_datasize; + + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) + BUG(); + /* + * Include the packet trailer. + */ + ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; +} + +/* + * This call commits the read index and potentially signals the host. + * Here is the pattern for using the "in-place" consumption APIs: + * + * while (get_next_pkt_raw() { + * process the packet "in-place"; + * put_pkt_raw(); + * } + * if (packets processed in place) + * commit_rd_index(); + */ +static inline void commit_rd_index(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + /* + * Make sure all reads are done before we update the read index since + * the writer may start writing to the read area once the read index + * is updated. + */ + virt_rmb(); + ring_info->ring_buffer->read_index = ring_info->priv_read_index; + + if (hv_need_to_signal_on_read(ring_info)) + vmbus_set_event(channel); +} + + #endif /* _HYPERV_H */ -- cgit v1.2.3 From 14bf873e5921fd414cf6f0b31b799eeabd27dd74 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 8 Sep 2016 02:58:32 +0300 Subject: gpio: lpc32xx: remove unused platform data file ARM LPC32xx platform is device-tree only, there is no need to keep a file with GPIO platform data structures, however some of macro definitions should be moved to the driver code, which is the only user of the removed header file. Signed-off-by: Vladimir Zapolskiy Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-lpc32xx.h | 50 ------------------------------ 1 file changed, 50 deletions(-) delete mode 100644 include/linux/platform_data/gpio-lpc32xx.h (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-lpc32xx.h b/include/linux/platform_data/gpio-lpc32xx.h deleted file mode 100644 index a544e962a818..000000000000 --- a/include/linux/platform_data/gpio-lpc32xx.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Author: Kevin Wells - * - * Copyright (C) 2010 NXP Semiconductors - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __MACH_GPIO_LPC32XX_H -#define __MACH_GPIO_LPC32XX_H - -/* - * Note! - * Muxed GP pins need to be setup to the GP state in the board level - * code prior to using this driver. - * GPI pins : 28xP3 group - * GPO pins : 24xP3 group - * GPIO pins: 8xP0 group, 24xP1 group, 13xP2 group, 6xP3 group - */ - -#define LPC32XX_GPIO_P0_MAX 8 -#define LPC32XX_GPIO_P1_MAX 24 -#define LPC32XX_GPIO_P2_MAX 13 -#define LPC32XX_GPIO_P3_MAX 6 -#define LPC32XX_GPI_P3_MAX 29 -#define LPC32XX_GPO_P3_MAX 24 - -#define LPC32XX_GPIO_P0_GRP 0 -#define LPC32XX_GPIO_P1_GRP (LPC32XX_GPIO_P0_GRP + LPC32XX_GPIO_P0_MAX) -#define LPC32XX_GPIO_P2_GRP (LPC32XX_GPIO_P1_GRP + LPC32XX_GPIO_P1_MAX) -#define LPC32XX_GPIO_P3_GRP (LPC32XX_GPIO_P2_GRP + LPC32XX_GPIO_P2_MAX) -#define LPC32XX_GPI_P3_GRP (LPC32XX_GPIO_P3_GRP + LPC32XX_GPIO_P3_MAX) -#define LPC32XX_GPO_P3_GRP (LPC32XX_GPI_P3_GRP + LPC32XX_GPI_P3_MAX) - -/* - * A specific GPIO can be selected with this macro - * ie, GPIO_05 can be selected with LPC32XX_GPIO(LPC32XX_GPIO_P3_GRP, 5) - * See the LPC32x0 User's guide for GPIO group numbers - */ -#define LPC32XX_GPIO(x, y) ((x) + (y)) - -#endif /* __MACH_GPIO_LPC32XX_H */ -- cgit v1.2.3 From 8e8118f893138d4cc3d4dbf4163d7497fca54a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 11 Sep 2016 22:55:53 +0200 Subject: netfilter: conntrack: remove packet hotpath stats These counters sit in hot path and do show up in perf, this is especially true for 'found' and 'searched' which get incremented for every packet processed. Information like searched=212030105 new=623431 found=333613 delete=623327 does not seem too helpful nowadays: - on busy systems found and searched will overflow every few hours (these are 32bit integers), other more busy ones every few days. - for debugging there are better methods, such as iptables' trace target, the conntrack log sysctls. Nowadays we also have perf tool. This removes packet path stat counters except those that are expected to be 0 (or close to 0) on a normal system, e.g. 'insert_failed' (race happened) or 'invalid' (proto tracker rejects). The insert stat is retained for the ctnetlink case. The found stat is retained for the tuple-is-taken check when NAT has to determine if it needs to pick a different source address. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 275505792664..1d1ef4e20512 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -4,13 +4,9 @@ #include struct ip_conntrack_stat { - unsigned int searched; unsigned int found; - unsigned int new; unsigned int invalid; unsigned int ignore; - unsigned int delete; - unsigned int delete_list; unsigned int insert; unsigned int insert_failed; unsigned int drop; -- cgit v1.2.3 From 7c7289a40425d48bbfcaacc454a8caf5b47f63b0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 7 Sep 2016 15:43:22 +0300 Subject: spi: pxa2xx: Default thresholds to PXA configuration Most of the devices in the supported list have PXA configuration of FIFO. In particularly Intel Medfield and Merrifield have bigger FIFO, than it's defined for CE4100. Split CE4100 in the similar way how it was done for Intel Quark, i.e. prefix definitions by CE4100 and append necessary pieces of code to switch case conditions. We are on safe side since those bits are ignored on all LPSS IPs. Signed-off-by: Andy Shevchenko Reviewed-by: Jarkko Nikula Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 2a097d176ba9..2d6f0c39ed68 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -83,7 +83,6 @@ #define SSSR_RFS (1 << 6) /* Receive FIFO Service Request */ #define SSSR_ROR (1 << 7) /* Receive FIFO Overrun */ -#ifdef CONFIG_ARCH_PXA #define RX_THRESH_DFLT 8 #define TX_THRESH_DFLT 8 @@ -95,19 +94,16 @@ #define SSCR1_RFT (0x00003c00) /* Receive FIFO Threshold (mask) */ #define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */ -#else - -#define RX_THRESH_DFLT 2 -#define TX_THRESH_DFLT 2 +#define RX_THRESH_CE4100_DFLT 2 +#define TX_THRESH_CE4100_DFLT 2 -#define SSSR_TFL_MASK (0x3 << 8) /* Transmit FIFO Level mask */ -#define SSSR_RFL_MASK (0x3 << 12) /* Receive FIFO Level mask */ +#define CE4100_SSSR_TFL_MASK (0x3 << 8) /* Transmit FIFO Level mask */ +#define CE4100_SSSR_RFL_MASK (0x3 << 12) /* Receive FIFO Level mask */ -#define SSCR1_TFT (0x000000c0) /* Transmit FIFO Threshold (mask) */ -#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..4] */ -#define SSCR1_RFT (0x00000c00) /* Receive FIFO Threshold (mask) */ -#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..4] */ -#endif +#define CE4100_SSCR1_TFT (0x000000c0) /* Transmit FIFO Threshold (mask) */ +#define CE4100_SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..4] */ +#define CE4100_SSCR1_RFT (0x00000c00) /* Receive FIFO Threshold (mask) */ +#define CE4100_SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..4] */ /* QUARK_X1000 SSCR0 bit definition */ #define QUARK_X1000_SSCR0_DSS (0x1F) /* Data Size Select (mask) */ -- cgit v1.2.3 From 88ef16d888a094587b2ac77de60927df5da5d56d Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:54:20 +0200 Subject: ACPI: I/O Remapping Table (IORT) initial support IORT shows representation of IO topology for ARM based systems. It describes how various components are connected together on parent-child basis e.g. PCI RC -> SMMU -> ITS. Also see IORT spec. http://infocenter.arm.com/help/topic/com.arm.doc.den0049b/DEN0049B_IO_Remapping_Table.pdf Initial support allows to detect IORT table presence and save its root pointer obtained through acpi_get_table(). The pointer validity depends on acpi_gbl_permanent_mmap because if acpi_gbl_permanent_mmap is not set while using IORT nodes we would dereference unmapped pointers. For the aforementioned reason call acpi_iort_init() from acpi_init() which guarantees acpi_gbl_permanent_mmap to be set at that point. Add generic helpers which are helpful for scanning and retrieving information from IORT table content. List of the most important helpers: - iort_find_dev_node() finds IORT node for a given device - iort_node_map_rid() maps device RID and returns IORT node which provides final translation IORT support is placed under drivers/acpi/arm64/ new directory due to its ARM64 specific nature. The code there is considered only for ARM64. The long term plan is to keep all ARM64 specific tables support in this place e.g. GTDT table. Signed-off-by: Tomasz Nowicki Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Reviewed-by: Lorenzo Pieralisi Signed-off-by: Marc Zyngier --- include/linux/acpi_iort.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/acpi_iort.h (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h new file mode 100644 index 000000000000..fcacaf7ed64d --- /dev/null +++ b/include/linux/acpi_iort.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2016, Semihalf + * Author: Tomasz Nowicki + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#ifndef __ACPI_IORT_H__ +#define __ACPI_IORT_H__ + +#include + +#ifdef CONFIG_ACPI_IORT +void acpi_iort_init(void); +#else +static inline void acpi_iort_init(void) { } +#endif + +#endif /* __ACPI_IORT_H__ */ -- cgit v1.2.3 From 4bf2efd26d7624372fb7adff8745b4c2e8407004 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:32:21 +0200 Subject: ACPI: Add new IORT functions to support MSI domain handling For ITS, MSI functionality consists on building domain stack and during that process we need to reference to domain stack components e.g. before we create new DOMAIN_BUS_PCI_MSI domain we need to specify its DOMAIN_BUS_NEXUS parent domain. In order to manage that process properly, maintain list which elements contain domain token (unique for MSI domain stack) and ITS ID: iort_register_domain_token() and iort_deregister_domain_token(). Then retrieve domain token any time later with ITS ID being key off: iort_find_domain_token(). With domain token and domain type we are able to find corresponding IRQ domain. Since IORT is prepared to describe MSI domain on a per-device basis, use existing IORT helpers and implement two calls: 1. iort_msi_map_rid() to map MSI RID for a device 2. iort_get_device_domain() to find domain token for a device Signed-off-by: Tomasz Nowicki Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Signed-off-by: Marc Zyngier --- include/linux/acpi_iort.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index fcacaf7ed64d..0e32dac8fd03 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -20,11 +20,23 @@ #define __ACPI_IORT_H__ #include +#include +#include +int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node); +void iort_deregister_domain_token(int trans_id); +struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT void acpi_iort_init(void); +u32 iort_msi_map_rid(struct device *dev, u32 req_id); +struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); #else static inline void acpi_iort_init(void) { } +static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) +{ return req_id; } +static inline struct irq_domain *iort_get_device_domain(struct device *dev, + u32 req_id) +{ return NULL; } #endif #endif /* __ACPI_IORT_H__ */ -- cgit v1.2.3 From db40f0a7aea5e03ef044ef5dbc51a364e1ff7991 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:32:24 +0200 Subject: irqchip/gicv3-its: Refactor ITS DT init code to prepare for ACPI In order to add ACPI support we need to isolate ACPI&DT common code and move DT logic to corresponding functions. To achieve this we are using firmware agnostic handle which can be unpacked to either DT or ACPI node. No functional changes other than a very minor one: 1. Terminate its_init call with -ENODEV for non-DT case which allows to remove hack from its-gic-v3.c. 2. Fix ITS base register address type (from 'unsigned long' to 'phys_addr_t'), as a bonus we get nice string formatting. 3. Since there is only one of ITS parent domain convert it to static global variable and drop the parameter from its_probe_one. Users can refer to it in more convenient way then. Signed-off-by: Hanjun Guo Signed-off-by: Tomasz Nowicki Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 99ac022edc60..8361c8d3edd1 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -430,9 +430,9 @@ struct rdists { }; struct irq_domain; -struct device_node; +struct fwnode_handle; int its_cpu_init(void); -int its_init(struct device_node *node, struct rdists *rdists, +int its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *domain); static inline bool gic_enable_sre(void) -- cgit v1.2.3 From fa7fd6fa38e36d88bc9f2d0e45e5b9bd0387079f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 19 Aug 2016 14:41:00 +0100 Subject: PM / sleep: enable suspend-to-idle even without registered suspend_ops Suspend-to-idle (aka the "freeze" sleep state) is a system sleep state in which all of the processors enter deepest possible idle state and wait for interrupts right after suspending all the devices. There is no hard requirement for a platform to support and register platform specific suspend_ops to enter suspend-to-idle/freeze state. Only deeper system sleep states like PM_SUSPEND_STANDBY and PM_SUSPEND_MEM rely on such low level support/implementation. suspend-to-idle can be entered as along as all the devices can be suspended. This patch enables the support for suspend-to-idle even on systems that don't have any low level support for deeper system sleep states and/or don't register any platform specific suspend_ops. Signed-off-by: Sudeep Holla Tested-by: Andy Gross Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 7693e39b14fe..d9718378a8be 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -245,6 +245,7 @@ static inline bool idle_should_freeze(void) return unlikely(suspend_freeze_state == FREEZE_STATE_ENTER); } +extern void __init pm_states_init(void); extern void freeze_set_ops(const struct platform_freeze_ops *ops); extern void freeze_wake(void); @@ -279,6 +280,7 @@ static inline bool pm_resume_via_firmware(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } static inline bool idle_should_freeze(void) { return false; } +static inline void __init pm_states_init(void) {} static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {} static inline void freeze_wake(void) {} #endif /* !CONFIG_SUSPEND */ -- cgit v1.2.3 From ec69572b3f0daf1f26b3e563d56e487d515a3cd1 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 12 Sep 2016 12:01:05 +0100 Subject: PM / Domains: Add new helper functions for device-tree Ideally, if we are returning a reference to a PM domain via a call to of_genpd_get_from_provider(), then we should keep track of such references via a reference count. The reference count could then be used to determine if a PM domain can be safely removed. Alternatively, it is possible to avoid such external references by providing APIs to access the PM domain and hence, eliminate any calls to of_genpd_get_from_provider(). Add new helper functions for adding a device and a subdomain to a PM domain when using device-tree, so that external calls to of_genpd_get_from_provider() can be removed. Signed-off-by: Jon Hunter Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 31fec858088c..e1964a242389 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -208,6 +208,10 @@ struct generic_pm_domain *__of_genpd_xlate_simple( struct generic_pm_domain *__of_genpd_xlate_onecell( struct of_phandle_args *genpdspec, void *data); +extern int of_genpd_add_device(struct of_phandle_args *args, + struct device *dev); +extern int of_genpd_add_subdomain(struct of_phandle_args *parent, + struct of_phandle_args *new_subdomain); int genpd_dev_pm_attach(struct device *dev); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ @@ -227,6 +231,18 @@ static inline struct generic_pm_domain *of_genpd_get_from_provider( #define __of_genpd_xlate_simple NULL #define __of_genpd_xlate_onecell NULL +static inline int of_genpd_add_device(struct of_phandle_args *args, + struct device *dev) +{ + return -ENODEV; +} + +static inline int of_genpd_add_subdomain(struct of_phandle_args *parent, + struct of_phandle_args *new_subdomain) +{ + return -ENODEV; +} + static inline int genpd_dev_pm_attach(struct device *dev) { return -ENODEV; -- cgit v1.2.3 From f58d4e5ab0ca3453f091eab514474e9fdbfc539f Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 12 Sep 2016 12:01:08 +0100 Subject: PM / Domains: Don't expose generic_pm_domain structure to clients There should be no need to expose the generic_pm_domain structure to clients and this eliminates the need to implement reference counting for any external reference to a PM domain. Therefore, make the functions pm_genpd_lookup_dev() and of_genpd_get_from_provider() private to the PM domain core. The functions are renamed in accordance with the naming conventions for genpd static functions. Signed-off-by: Jon Hunter Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index e1964a242389..bd411e754f4a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -116,7 +116,6 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) return to_gpd_data(dev->power.subsys_data->domain_data); } -extern struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev); extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td); @@ -138,10 +137,6 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) { return ERR_PTR(-ENOSYS); } -static inline struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev) -{ - return NULL; -} static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td) @@ -199,9 +194,6 @@ typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args, int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate, void *data); void of_genpd_del_provider(struct device_node *np); -struct generic_pm_domain *of_genpd_get_from_provider( - struct of_phandle_args *genpdspec); - struct generic_pm_domain *__of_genpd_xlate_simple( struct of_phandle_args *genpdspec, void *data); @@ -222,12 +214,6 @@ static inline int __of_genpd_add_provider(struct device_node *np, } static inline void of_genpd_del_provider(struct device_node *np) {} -static inline struct generic_pm_domain *of_genpd_get_from_provider( - struct of_phandle_args *genpdspec) -{ - return NULL; -} - #define __of_genpd_xlate_simple NULL #define __of_genpd_xlate_onecell NULL -- cgit v1.2.3 From 892ebdcccb4e11c5b43036a50e89e3869df5b429 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 12 Sep 2016 12:01:09 +0100 Subject: PM / Domains: Don't expose xlate and provider helper functions Functions __of_genpd_xlate_simple(), __of_genpd_xlate_onecell() and __of_genpd_add_provider() are not used outside of the core generic PM domain code. Therefore, reduce the number of APIs exposed by making these static. At the same time don't expose the typedef for genpd_xlate_t either and make this a local definition as well. The functions are renamed to follow the naming conventions for static functions in the generic PM domain core. Signed-off-by: Jon Hunter Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index bd411e754f4a..f103869db443 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -187,19 +187,12 @@ struct genpd_onecell_data { unsigned int num_domains; }; -typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args, - void *data); - #ifdef CONFIG_PM_GENERIC_DOMAINS_OF -int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate, - void *data); +int of_genpd_add_provider_simple(struct device_node *np, + struct generic_pm_domain *genpd); +int of_genpd_add_provider_onecell(struct device_node *np, + struct genpd_onecell_data *data); void of_genpd_del_provider(struct device_node *np); -struct generic_pm_domain *__of_genpd_xlate_simple( - struct of_phandle_args *genpdspec, - void *data); -struct generic_pm_domain *__of_genpd_xlate_onecell( - struct of_phandle_args *genpdspec, - void *data); extern int of_genpd_add_device(struct of_phandle_args *args, struct device *dev); extern int of_genpd_add_subdomain(struct of_phandle_args *parent, @@ -207,15 +200,19 @@ extern int of_genpd_add_subdomain(struct of_phandle_args *parent, int genpd_dev_pm_attach(struct device *dev); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ -static inline int __of_genpd_add_provider(struct device_node *np, - genpd_xlate_t xlate, void *data) +static inline int of_genpd_add_provider_simple(struct device_node *np, + struct generic_pm_domain *genpd) { - return 0; + return -ENOTSUPP; +} + +static inline int of_genpd_add_provider_onecell(struct device_node *np, + struct genpd_onecell_data *data) +{ + return -ENOTSUPP; } -static inline void of_genpd_del_provider(struct device_node *np) {} -#define __of_genpd_xlate_simple NULL -#define __of_genpd_xlate_onecell NULL +static inline void of_genpd_del_provider(struct device_node *np) {} static inline int of_genpd_add_device(struct of_phandle_args *args, struct device *dev) @@ -235,17 +232,6 @@ static inline int genpd_dev_pm_attach(struct device *dev) } #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ -static inline int of_genpd_add_provider_simple(struct device_node *np, - struct generic_pm_domain *genpd) -{ - return __of_genpd_add_provider(np, __of_genpd_xlate_simple, genpd); -} -static inline int of_genpd_add_provider_onecell(struct device_node *np, - struct genpd_onecell_data *data) -{ - return __of_genpd_add_provider(np, __of_genpd_xlate_onecell, data); -} - #ifdef CONFIG_PM extern int dev_pm_domain_attach(struct device *dev, bool power_on); extern void dev_pm_domain_detach(struct device *dev, bool power_off); -- cgit v1.2.3 From de0aa06d8b1c39df1071bfe169b3b97ca6bc01ac Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 12 Sep 2016 12:01:12 +0100 Subject: PM / Domains: Store the provider in the PM domain structure It is possible that a device has more than one provider of PM domains and to support the removal of a PM domain by provider, it is necessary to store a reference to the provider in the PM domain structure. Therefore, store a reference to the firmware node handle in the PM domain structure and populate it when providers (only device-tree based providers are currently supported by PM domains) are registered. Please note that when removing PM domains, it is necessary to verify that the PM domain provider has been removed from the list of providers before the PM domain can be removed. To do this add another member to the PM domain structure that indicates if the provider is present and set this member accordingly when providers are added and removed. Initialise the 'provider' and 'has_provider' members of the generic_pm_domain structure when a PM domains is added by calling pm_genpd_init(). Signed-off-by: Jon Hunter Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f103869db443..554f8915c691 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -51,6 +51,8 @@ struct generic_pm_domain { struct mutex lock; struct dev_power_governor *gov; struct work_struct power_off_work; + struct fwnode_handle *provider; /* Identity of the domain provider */ + bool has_provider; const char *name; atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ -- cgit v1.2.3 From 3fe577107ccf1974958701df710e0b07ef16db75 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 12 Sep 2016 12:01:13 +0100 Subject: PM / Domains: Add support for removing PM domains The genpd framework allows users to add PM domains via the pm_genpd_init() function, however, there is no corresponding function to remove a PM domain. For most devices this may be fine as the PM domains are never removed, however, for devices that wish to populate the PM domains from within a driver, having the ability to remove a PM domain if the probing of the device fails or the driver is unloaded is necessary. Add the function pm_genpd_remove() to remove a PM domain by referencing it's generic_pm_domain structure. Note that the bulk of the code that removes the PM domain is placed in a separate local function genpd_remove() (which is called by pm_genpd_remove()). The code is structured in this way to prepare for adding another function to remove a PM domain by provider that will also call genpd_remove(). Note that users of genpd_remove() must call this function with the mutex, gpd_list_lock, held. PM domains can only be removed if the associated provider has been removed, they are not a parent domain to another PM domain and have no devices associated with them. Signed-off-by: Jon Hunter Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 554f8915c691..85f7d53a9827 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -130,6 +130,7 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); extern int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); +extern int pm_genpd_remove(struct generic_pm_domain *genpd); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -165,6 +166,10 @@ static inline int pm_genpd_init(struct generic_pm_domain *genpd, { return -ENOSYS; } +static inline int pm_genpd_remove(struct generic_pm_domain *genpd) +{ + return -ENOTSUPP; +} #endif static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, -- cgit v1.2.3 From 17926551c98a4ff5d7fa3a574c60534fedb3f2c6 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 12 Sep 2016 12:01:14 +0100 Subject: PM / Domains: Add support for removing nested PM domains by provider If a device supports PM domains that are subdomains of another PM domain, then the PM domains should be removed in reverse order to ensure that the subdomains are removed first. Furthermore, if there is more than one provider, then there needs to be a way to remove the domains in reverse order for a specific provider. Add the function of_genpd_remove_last() to remove the last PM domain added by a given PM domain provider and return the generic_pm_domain structure for the PM domain that was removed. Signed-off-by: Jon Hunter Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 85f7d53a9827..a09fe5c009c8 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -204,6 +204,7 @@ extern int of_genpd_add_device(struct of_phandle_args *args, struct device *dev); extern int of_genpd_add_subdomain(struct of_phandle_args *parent, struct of_phandle_args *new_subdomain); +extern struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int genpd_dev_pm_attach(struct device *dev); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ @@ -237,6 +238,12 @@ static inline int genpd_dev_pm_attach(struct device *dev) { return -ENODEV; } + +static inline +struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) +{ + return ERR_PTR(-ENOTSUPP); +} #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ #ifdef CONFIG_PM -- cgit v1.2.3 From c7914e8dfa4032d24ef7af4c86b9c841ec6b74e6 Mon Sep 17 00:00:00 2001 From: Chris Zhong Date: Fri, 9 Sep 2016 19:15:44 -0700 Subject: extcon: Introduce EXTCON_PROP_DISP_HPD property EXTCON_PROP_DISP_HPD is need by display port, if the system has no hpd interrupt, this property can be used. - HPD (Hot Plug Detect) send the signal whether display device is on or off to source device. Signed-off-by: Chris Zhong Reviewed-by: Guenter Roeck [cw00.choi: Add the description of HPD and full name of HPD] Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index b34d1ae9011f..5c35f9d1822c 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -133,9 +133,20 @@ #define EXTCON_PROP_JACK_MAX 100 #define EXTCON_PROP_JACK_CNT (EXTCON_PROP_JACK_MAX - EXTCON_PROP_JACK_MIN + 1) +/* + * Properties of EXTCON_TYPE_DISP. + * + * - EXTCON_PROP_DISP_HPD (Hot Plug Detect) + * @type: integer (intval) + * @value: 0 (no hpd) or 1 (hpd) + * @default: 0 (no hpd) + * + */ +#define EXTCON_PROP_DISP_HPD 150 + /* Properties of EXTCON_TYPE_DISP. */ #define EXTCON_PROP_DISP_MIN 150 -#define EXTCON_PROP_DISP_MAX 150 +#define EXTCON_PROP_DISP_MAX 151 #define EXTCON_PROP_DISP_CNT (EXTCON_PROP_DISP_MAX - EXTCON_PROP_DISP_MIN + 1) /* -- cgit v1.2.3 From 050bc4e846af24e77af82d0fa5f718e0919d15a4 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 12 Sep 2016 15:19:41 +0200 Subject: scsi: introduce a quirk for false cache reporting Some SATA to USB bridges fail to cooperate with some drives resulting in no cache being present being reported to the host. That causes the host to skip sending a command to synchronize caches. That causes data loss when the drive is powered down. Signed-off-by: Oliver Neukum Reviewed-by: Martin K. Petersen Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 245f57dbbb61..0aae1b2ee931 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -81,6 +81,8 @@ /* Sets max_sectors to 240 */ \ US_FLAG(NO_REPORT_LUNS, 0x10000000) \ /* Cannot handle REPORT_LUNS */ \ + US_FLAG(ALWAYS_SYNC, 0x20000000) \ + /* lies about caching, so always sync */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From 8824ee8573483e1c91691b5be3d3730e75551dce Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sat, 27 Aug 2016 15:55:38 +0800 Subject: mfd: axp20x: Add support for AXP806 PMIC The X-Powers AXP806 is a new PMIC that is paired with Allwinner's A80 SoC, along with a master AXP809 PMIC. This PMIC has a new register layout, and supports some functions not seen in other X-Powers PMICs, such as master-slave mode, or having multiple AXP806 PMICs on the same bus with address space extension, or supporting both I2C and RSB mode. I2C has not been tested. This patch adds support for the interrupts of the PMIC. A regulator sub-device is enabled, but actual regulator support will come in a later patch. Signed-off-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 0be4982f08fe..fec597fb34cb 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -20,6 +20,7 @@ enum { AXP221_ID, AXP223_ID, AXP288_ID, + AXP806_ID, AXP809_ID, NR_AXP20X_VARIANTS, }; @@ -91,6 +92,30 @@ enum { #define AXP22X_ALDO3_V_OUT 0x2a #define AXP22X_CHRG_CTRL3 0x35 +#define AXP806_STARTUP_SRC 0x00 +#define AXP806_CHIP_ID 0x03 +#define AXP806_PWR_OUT_CTRL1 0x10 +#define AXP806_PWR_OUT_CTRL2 0x11 +#define AXP806_DCDCA_V_CTRL 0x12 +#define AXP806_DCDCB_V_CTRL 0x13 +#define AXP806_DCDCC_V_CTRL 0x14 +#define AXP806_DCDCD_V_CTRL 0x15 +#define AXP806_DCDCE_V_CTRL 0x16 +#define AXP806_ALDO1_V_CTRL 0x17 +#define AXP806_ALDO2_V_CTRL 0x18 +#define AXP806_ALDO3_V_CTRL 0x19 +#define AXP806_DCDC_MODE_CTRL1 0x1a +#define AXP806_DCDC_MODE_CTRL2 0x1b +#define AXP806_DCDC_FREQ_CTRL 0x1c +#define AXP806_BLDO1_V_CTRL 0x20 +#define AXP806_BLDO2_V_CTRL 0x21 +#define AXP806_BLDO3_V_CTRL 0x22 +#define AXP806_BLDO4_V_CTRL 0x23 +#define AXP806_CLDO1_V_CTRL 0x24 +#define AXP806_CLDO2_V_CTRL 0x25 +#define AXP806_CLDO3_V_CTRL 0x26 +#define AXP806_VREF_TEMP_WARN_L 0xf3 + /* Interrupt */ #define AXP152_IRQ1_EN 0x40 #define AXP152_IRQ2_EN 0x41 @@ -265,6 +290,26 @@ enum { AXP22X_REG_ID_MAX, }; +enum { + AXP806_DCDCA = 0, + AXP806_DCDCB, + AXP806_DCDCC, + AXP806_DCDCD, + AXP806_DCDCE, + AXP806_ALDO1, + AXP806_ALDO2, + AXP806_ALDO3, + AXP806_BLDO1, + AXP806_BLDO2, + AXP806_BLDO3, + AXP806_BLDO4, + AXP806_CLDO1, + AXP806_CLDO2, + AXP806_CLDO3, + AXP806_SW, + AXP806_REG_ID_MAX, +}; + enum { AXP809_DCDC1 = 0, AXP809_DCDC2, @@ -414,6 +459,21 @@ enum axp288_irqs { AXP288_IRQ_BC_USB_CHNG, }; +enum axp806_irqs { + AXP806_IRQ_DIE_TEMP_HIGH_LV1, + AXP806_IRQ_DIE_TEMP_HIGH_LV2, + AXP806_IRQ_DCDCA_V_LOW, + AXP806_IRQ_DCDCB_V_LOW, + AXP806_IRQ_DCDCC_V_LOW, + AXP806_IRQ_DCDCD_V_LOW, + AXP806_IRQ_DCDCE_V_LOW, + AXP806_IRQ_PWROK_LONG, + AXP806_IRQ_PWROK_SHORT, + AXP806_IRQ_WAKEUP, + AXP806_IRQ_PWROK_FALL, + AXP806_IRQ_PWROK_RISE, +}; + enum axp809_irqs { AXP809_IRQ_ACIN_OVER_V = 1, AXP809_IRQ_ACIN_PLUGIN, -- cgit v1.2.3 From 8c34ab1910a79319731107ec8ecd2e80893ea30c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 9 Sep 2016 23:59:33 +0200 Subject: cpufreq / sched: SCHED_CPUFREQ_IOWAIT flag to indicate iowait condition Testing indicates that it is possible to improve performace significantly without increasing energy consumption too much by teaching cpufreq governors to bump up the CPU performance level if the in_iowait flag is set for the task in enqueue_task_fair(). For this purpose, define a new cpufreq_update_util() flag SCHED_CPUFREQ_IOWAIT and modify enqueue_task_fair() to pass that flag to cpufreq_update_util() in the in_iowait case. That generally requires cpufreq_update_util() to be called directly from there, because update_load_avg() may not be invoked in that case. Signed-off-by: Rafael J. Wysocki Looks-good-to: Steve Muckle Acked-by: Peter Zijlstra (Intel) --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b0fa726b7f31..98fe95fea30c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3471,6 +3471,7 @@ static inline unsigned long rlimit_max(unsigned int limit) #define SCHED_CPUFREQ_RT (1U << 0) #define SCHED_CPUFREQ_DL (1U << 1) +#define SCHED_CPUFREQ_IOWAIT (1U << 2) #define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) -- cgit v1.2.3 From ecb3f394c5dba897d215a5422f1b363e93e2ce4e Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 13 Sep 2016 12:14:51 -0400 Subject: genirq: Expose interrupt information through sysfs Information about interrupts is exposed via /proc/interrupts, but the format of that file has changed over kernel versions and differs across architectures. It also has varying column numbers depending on hardware. That all makes it hard for tools to parse. To solve this, expose the information through sysfs so each irq attribute is in a separate file in a consistent, machine parsable way. This feature is only available when both CONFIG_SPARSE_IRQ and CONFIG_SYSFS are enabled. Examples: /sys/kernel/irq/18/actions: i801_smbus,ehci_hcd:usb1,uhci_hcd:usb7 /sys/kernel/irq/18/chip_name: IR-IO-APIC /sys/kernel/irq/18/hwirq: 18 /sys/kernel/irq/18/name: fasteoi /sys/kernel/irq/18/per_cpu_count: 0,0 /sys/kernel/irq/18/type: level /sys/kernel/irq/25/actions: ahci0 /sys/kernel/irq/25/chip_name: IR-PCI-MSI /sys/kernel/irq/25/hwirq: 512000 /sys/kernel/irq/25/name: edge /sys/kernel/irq/25/per_cpu_count: 29036,0 /sys/kernel/irq/25/type: edge [ tglx: Moved kobject_del() under sparse_irq_lock, massaged code comments and changelog ] Signed-off-by: Craig Gallek Cc: David Decotigny Link: http://lkml.kernel.org/r/1473783291-122873-1-git-send-email-kraigatgoog@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index b51beebf9804..c9be57931b58 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -2,6 +2,7 @@ #define _LINUX_IRQDESC_H #include +#include /* * Core internal functions to deal with irq descriptors @@ -43,6 +44,7 @@ struct pt_regs; * @force_resume_depth: number of irqactions on a irq descriptor with * IRQF_FORCE_RESUME set * @rcu: rcu head for delayed free + * @kobj: kobject used to represent this struct in sysfs * @dir: /proc/irq/ procfs entry * @name: flow handler name for /proc/interrupts output */ @@ -88,6 +90,7 @@ struct irq_desc { #endif #ifdef CONFIG_SPARSE_IRQ struct rcu_head rcu; + struct kobject kobj; #endif int parent_irq; struct module *owner; -- cgit v1.2.3 From 6e219353afa1f67f453141f7462b01708ebf5574 Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Tue, 13 Sep 2016 12:23:15 -0600 Subject: block: add poll_considered statistic In order to help determine the effectiveness of polling in a running system it is usful to determine the ratio of how often the poll function is called vs how often the completion is checked. For this reason we add a poll_considered variable and add it to the sysfs entry for io_poll. Signed-off-by: Stephen Bates Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e1544f0f8c21..7710f795d7c2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -61,6 +61,7 @@ struct blk_mq_hw_ctx { struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj; + unsigned long poll_considered; unsigned long poll_invoked; unsigned long poll_success; }; -- cgit v1.2.3 From abe47114b192a9e0167905a3418d815b4fcf87de Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 14 Sep 2016 14:33:15 +0200 Subject: block: remove blk_mq_alloc_single_hw_queue() prototype The blk_mq_alloc_single_hw_queue() is a prototype artifact that should have been removed with commit cdef54dd85ad66e77262ea57796a3e81683dd5d6 "blk-mq: remove alloc_hctx and free_hctx methods" where the last users of it were deleted. Fixes: cdef54dd85ad ("blk-mq: remove alloc_hctx and free_hctx methods") Signed-off-by: Linus Walleij Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7710f795d7c2..ff14f68067aa 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -223,7 +223,6 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) } struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); -struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); int blk_mq_request_started(struct request *rq); void blk_mq_start_request(struct request *rq); -- cgit v1.2.3 From 637ca77bd1f7950538956c61dcd0c2e559905dbf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Sep 2016 10:44:12 +0200 Subject: block: Document that bio_op() uses the data type of bio.bi_opf Make it clear that the sizeof(unsigned int) expression in BIO_OP_SHIFT refers to the bi_opf member of struct bio. Signed-off-by: Bart Van Assche Cc: Mike Christie Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 436f43f87da9..1e1ef210ae91 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -89,7 +89,7 @@ struct bio { struct bio_vec bi_inline_vecs[0]; }; -#define BIO_OP_SHIFT (8 * sizeof(unsigned int) - REQ_OP_BITS) +#define BIO_OP_SHIFT (8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS) #define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT) #define bio_set_op_attrs(bio, op, op_flags) do { \ -- cgit v1.2.3 From 4382e33ad374862eacf62003bb02c750391ada05 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Sep 2016 10:45:36 +0200 Subject: block, dm-crypt, btrfs: Introduce bio_flags() Introduce the bio_flags() macro. Ensure that the second argument of bio_set_op_attrs() only contains flags and no operation. This patch does not change any functionality. Signed-off-by: Bart Van Assche Cc: Mike Christie Cc: Chris Mason (maintainer:BTRFS FILE SYSTEM) Cc: Josef Bacik (maintainer:BTRFS FILE SYSTEM) Cc: Mike Snitzer Cc: Hannes Reinecke Cc: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1e1ef210ae91..311fa2f478b8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -90,11 +90,12 @@ struct bio { }; #define BIO_OP_SHIFT (8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS) +#define bio_flags(bio) ((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1)) #define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT) #define bio_set_op_attrs(bio, op, op_flags) do { \ WARN_ON(op >= (1 << REQ_OP_BITS)); \ - (bio)->bi_opf &= ((1 << BIO_OP_SHIFT) - 1); \ + (bio)->bi_opf = bio_flags(bio); \ (bio)->bi_opf |= ((unsigned int) (op) << BIO_OP_SHIFT); \ (bio)->bi_opf |= op_flags; \ } while (0) -- cgit v1.2.3 From 3e1de31b9bf608c5b35e2d0d134eb87f2a9ba4ae Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Sep 2016 10:46:22 +0200 Subject: block: Improve bio_set_op_attrs() robustness Since REQ_OP_BITS == 3 and __REQ_NR_BITS == 30 it is not that hard to pass an op_flags argument to bio_set_op_attrs() that is larger than the number of bits reserved for the op_flags argument. Complain if this happens. Additionally, ensure that negative arguments trigger a complaint (1 << ... is signed while 1U << ... is unsigned; adding 0U to an integer expression causes it to be promoted to an unsigned type). Signed-off-by: Bart Van Assche Cc: Mike Christie Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 311fa2f478b8..53ee1a2acd4f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -93,11 +93,18 @@ struct bio { #define bio_flags(bio) ((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1)) #define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT) -#define bio_set_op_attrs(bio, op, op_flags) do { \ - WARN_ON(op >= (1 << REQ_OP_BITS)); \ - (bio)->bi_opf = bio_flags(bio); \ - (bio)->bi_opf |= ((unsigned int) (op) << BIO_OP_SHIFT); \ - (bio)->bi_opf |= op_flags; \ +#define bio_set_op_attrs(bio, op, op_flags) do { \ + if (__builtin_constant_p(op)) \ + BUILD_BUG_ON((op) + 0U >= (1U << REQ_OP_BITS)); \ + else \ + WARN_ON_ONCE((op) + 0U >= (1U << REQ_OP_BITS)); \ + if (__builtin_constant_p(op_flags)) \ + BUILD_BUG_ON((op_flags) + 0U >= (1U << BIO_OP_SHIFT)); \ + else \ + WARN_ON_ONCE((op_flags) + 0U >= (1U << BIO_OP_SHIFT)); \ + (bio)->bi_opf = bio_flags(bio); \ + (bio)->bi_opf |= (((op) + 0U) << BIO_OP_SHIFT); \ + (bio)->bi_opf |= (op_flags); \ } while (0) #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) -- cgit v1.2.3 From 3f7c624aa58f769e0313ca3310704c5d88ac99ce Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Sep 2016 16:03:02 +0200 Subject: block: remove bio_destructor_t Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 53ee1a2acd4f..cd395ecec99d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -16,7 +16,6 @@ struct block_device; struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); -typedef void (bio_destructor_t) (struct bio *); #ifdef CONFIG_BLOCK /* -- cgit v1.2.3 From fc95db3edeaf924e9ad16592d9c1b06c730a49c9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Sep 2016 16:03:03 +0200 Subject: bio.h: remove a very outdated comment Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 23ddf4b46a9b..e00721a2dce1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -1,6 +1,4 @@ /* - * 2.5 block I/O model - * * Copyright (C) 2001 Jens Axboe * * This program is free software; you can redistribute it and/or modify -- cgit v1.2.3 From c5c5ca777469f0ff854f1da0aff9b3a9051b3ef7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Sep 2016 16:03:04 +0200 Subject: block: remove IOPRIO_BITS Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index beb9ce1c2c23..8c1239020d79 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -7,7 +7,6 @@ /* * Gives us 8 prio classes with 13-bits of data for each class */ -#define IOPRIO_BITS (16) #define IOPRIO_CLASS_SHIFT (13) #define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) -- cgit v1.2.3 From 2849450ad39d2e699fda2d5c6f41e05d87fd7004 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 14 Sep 2016 13:28:30 -0400 Subject: blk-mq: introduce blk_mq_delay_kick_requeue_list() blk_mq_delay_kick_requeue_list() provides the ability to kick the q->requeue_list after a specified time. To do this the request_queue's 'requeue_work' member was changed to a delayed_work. blk_mq_delay_kick_requeue_list() allows DM to defer processing requeued requests while it doesn't make sense to immediately requeue them (e.g. when all paths in a DM multipath have failed). Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ff14f68067aa..60ef14cbcd2d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -233,6 +233,7 @@ void blk_mq_requeue_request(struct request *rq); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); void blk_mq_cancel_requeue_work(struct request_queue *q); void blk_mq_kick_requeue_list(struct request_queue *q); +void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_abort_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq, int error); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 69aae720f4ef..c47c358ba052 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -449,7 +449,7 @@ struct request_queue { struct list_head requeue_list; spinlock_t requeue_lock; - struct work_struct requeue_work; + struct delayed_work requeue_work; struct mutex sysfs_lock; -- cgit v1.2.3 From a8ac51e4ab97765838ae6a07d6ff7f7bfaaa0ea3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 9 Sep 2016 19:24:57 -0400 Subject: dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests Otherwise blk-mq will immediately dispatch requests that are requeued via a BLK_MQ_RQ_QUEUE_BUSY return from blk_mq_ops .queue_rq. Delayed requeue is implemented using blk_mq_delay_kick_requeue_list() with a delay of 5 secs. In the context of DM multipath (all paths down) it doesn't make any sense to requeue more quickly. Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 91acfce74a22..ef7962e84444 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -590,6 +590,7 @@ extern struct ratelimit_state dm_ratelimit_state; #define DM_MAPIO_SUBMITTED 0 #define DM_MAPIO_REMAPPED 1 #define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE +#define DM_MAPIO_DELAY_REQUEUE 3 #define dm_sector_div64(x, y)( \ { \ -- cgit v1.2.3 From 2eefd8789698e89c4a5d610921dc3c1b66e3bd0d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 5 Sep 2016 16:33:05 +0300 Subject: x86/arch_prctl/vdso: Add ARCH_MAP_VDSO_* Add API to change vdso blob type with arch_prctl. As this is usefull only by needs of CRIU, expose this interface under CONFIG_CHECKPOINT_RESTORE. Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: oleg@redhat.com Cc: linux-mm@kvack.org Cc: gorcunov@openvz.org Cc: xemul@virtuozzo.com Link: http://lkml.kernel.org/r/20160905133308.28234-4-dsafonov@virtuozzo.com Signed-off-by: Thomas Gleixner --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ef815b9cd426..5f14534f0c90 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2019,6 +2019,8 @@ extern struct file *get_task_exe_file(struct task_struct *task); extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages); extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages); +extern bool vma_is_special_mapping(const struct vm_area_struct *vma, + const struct vm_special_mapping *sm); extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long flags, -- cgit v1.2.3 From 12adfd882c5f37548acaba4f043a158b3c54468b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 23 Jul 2016 19:27:50 +0100 Subject: list: Expand list_first_entry_or_null() Due to the use of READ_ONCE() in list_empty() the compiler cannot optimise !list_empty() ? list_first_entry() : NULL very well. By manually expanding list_first_entry_or_null() we can take advantage of the READ_ONCE() to avoid the list element changing under the test while the compiler can generate smaller code. Signed-off-by: Chris Wilson Cc: "Paul E. McKenney" Cc: Andrew Morton Cc: Dan Williams Cc: Jan Kara Cc: Josef Bacik Cc: linux-kernel@vger.kernel.org Signed-off-by: Paul E. McKenney --- include/linux/list.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 5183138aa932..5809e9a2de5b 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -381,8 +381,11 @@ static inline void list_splice_tail_init(struct list_head *list, * * Note that if the list is empty, it returns NULL. */ -#define list_first_entry_or_null(ptr, type, member) \ - (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) +#define list_first_entry_or_null(ptr, type, member) ({ \ + struct list_head *head__ = (ptr); \ + struct list_head *pos__ = READ_ONCE(head__->next); \ + pos__ != head__ ? list_entry(pos__, type, member) : NULL; \ +}) /** * list_next_entry - get the next element in list -- cgit v1.2.3 From 28f4b04143c56135b1ca742fc64b664ed04de6a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:47 +0200 Subject: genirq/msi: Add cpumask allocation to alloc_msi_entry For irq spreading want to store affinity masks in the msi_entry. Add the infrastructure for it. We allocate an array of cpumasks with an array size of the number of used vectors in the entry, so we can hand in the information per linux interrupt later. As we hand in the number of used vectors, we assign them right away. Convert all the call sites. Signed-off-by: Thomas Gleixner Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/1473862739-15032-2-git-send-email-hch@lst.de --- include/linux/msi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index e8c81fbd5f9c..0db320b7bb15 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -68,7 +68,7 @@ struct msi_desc { unsigned int nvec_used; struct device *dev; struct msi_msg msg; - const struct cpumask *affinity; + struct cpumask *affinity; union { /* PCI MSI/X specific data */ @@ -123,7 +123,8 @@ static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc) } #endif /* CONFIG_PCI_MSI */ -struct msi_desc *alloc_msi_entry(struct device *dev); +struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, + const struct cpumask *affinity); void free_msi_entry(struct msi_desc *entry); void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); -- cgit v1.2.3 From 34c3d9819fda464be4f1bec59b63353814f76c73 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:48 +0200 Subject: genirq/affinity: Provide smarter irq spreading infrastructure The current irq spreading infrastructure is just looking at a cpumask and tries to spread the interrupts over the mask. Thats suboptimal as it does not take numa nodes into account. Change the logic so the interrupts are spread across numa nodes and inside the nodes. If there are more cpus than vectors per node, then we set the affinity to several cpus. If HT siblings are available we take that into account and try to set all siblings to a single vector. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-3-git-send-email-hch@lst.de --- include/linux/interrupt.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b6683f0ffc9f..4e59d122cad9 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -279,6 +279,8 @@ extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs); +struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec); +int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec); #else /* CONFIG_SMP */ @@ -316,6 +318,19 @@ static inline struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) *nr_vecs = 1; return NULL; } + +static inline struct cpumask * +irq_create_affinity_masks(const struct cpumask *affinity, int nvec) +{ + return NULL; +} + +static inline int +irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec) +{ + return maxvec; +} + #endif /* CONFIG_SMP */ /* -- cgit v1.2.3 From 44082fd6702fb12020967fd375f8bf6dd7c111bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:50 +0200 Subject: genirq/affinity: Remove old irq spread infrastructure No more users. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-5-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 4e59d122cad9..72f0721f75e7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -278,7 +278,6 @@ extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); -struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs); struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec); int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec); @@ -313,12 +312,6 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) return 0; } -static inline struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) -{ - *nr_vecs = 1; - return NULL; -} - static inline struct cpumask * irq_create_affinity_masks(const struct cpumask *affinity, int nvec) { -- cgit v1.2.3 From ee8d41e53efe14bfc5ea5866e1178b06d78a7c95 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:51 +0200 Subject: pci/msi: Retrieve affinity for a vector Add a helper to get the affinity mask for a given PCI irq vector. For MSI or MSI-X vectors these are stored by the IRQ core, while for legacy interrupts we will always return cpu_possible_map. [hch: updated to follow the style of pci_irq_vector()] Signed-off-by: Thomas Gleixner Signed-off-by: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-6-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- include/linux/pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0ab835965669..3b0a8004f313 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1300,6 +1300,7 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags); void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); +const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1342,6 +1343,11 @@ static inline int pci_irq_vector(struct pci_dev *dev, unsigned int nr) return -EINVAL; return dev->irq; } +static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, + int vec) +{ + return cpu_possible_mask; +} #endif #ifdef CONFIG_PCIEPORTBUS -- cgit v1.2.3 From c65eacbe290b8141554c71b2c94489e73ade8c8d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Sep 2016 14:29:24 -0700 Subject: sched/core: Allow putting thread_info into task_struct If an arch opts in by setting CONFIG_THREAD_INFO_IN_TASK_STRUCT, then thread_info is defined as a single 'u32 flags' and is the first entry of task_struct. thread_info::task is removed (it serves no purpose if thread_info is embedded in task_struct), and thread_info::cpu gets its own slot in task_struct. This is heavily based on a patch written by Linus. Originally-from: Linus Torvalds Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a0898196f0476195ca02713691a5037a14f2aac5.1473801993.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 9 +++++++++ include/linux/sched.h | 36 ++++++++++++++++++++++++++++++++++-- include/linux/thread_info.h | 15 +++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f8834f820ec2..9c04d44eeb3c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,6 +15,8 @@ #include #include +#include + #ifdef CONFIG_SMP # define INIT_PUSHABLE_TASKS(tsk) \ .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), @@ -183,12 +185,19 @@ extern struct task_group root_task_group; # define INIT_KASAN(tsk) #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK +# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), +#else +# define INIT_TASK_TI(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) */ #define INIT_TASK(tsk) \ { \ + INIT_TASK_TI(tsk) \ .state = 0, \ .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 20f9f47bcfd0..a287e8b13549 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1458,6 +1458,13 @@ struct tlbflush_unmap_batch { }; struct task_struct { +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* + * For reasons of header soup (see current_thread_info()), this + * must be the first element of task_struct. + */ + struct thread_info thread_info; +#endif volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; atomic_t usage; @@ -1467,6 +1474,9 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; +#ifdef CONFIG_THREAD_INFO_IN_TASK + unsigned int cpu; /* current CPU */ +#endif unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; @@ -2588,7 +2598,9 @@ extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { +#ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; +#endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; @@ -3076,10 +3088,26 @@ static inline void threadgroup_change_end(struct task_struct *tsk) cgroup_threadgroup_change_end(tsk); } -#ifndef __HAVE_THREAD_FUNCTIONS +#ifdef CONFIG_THREAD_INFO_IN_TASK + +static inline struct thread_info *task_thread_info(struct task_struct *task) +{ + return &task->thread_info; +} +static inline void *task_stack_page(const struct task_struct *task) +{ + return task->stack; +} +#define setup_thread_stack(new,old) do { } while(0) +static inline unsigned long *end_of_stack(const struct task_struct *task) +{ + return task->stack; +} + +#elif !defined(__HAVE_THREAD_FUNCTIONS) #define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((task)->stack) +#define task_stack_page(task) ((void *)(task)->stack) static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) { @@ -3379,7 +3407,11 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) static inline unsigned int task_cpu(const struct task_struct *p) { +#ifdef CONFIG_THREAD_INFO_IN_TASK + return p->cpu; +#else return task_thread_info(p)->cpu; +#endif } static inline int task_node(const struct task_struct *p) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 2b5b10eed74f..e2d0fd81b1ba 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -13,6 +13,21 @@ struct timespec; struct compat_timespec; +#ifdef CONFIG_THREAD_INFO_IN_TASK +struct thread_info { + u32 flags; /* low level flags */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .flags = 0, \ +} +#endif + +#ifdef CONFIG_THREAD_INFO_IN_TASK +#define current_thread_info() ((struct thread_info *)current) +#endif + /* * System call restart block. */ -- cgit v1.2.3 From a727b025f43d7952c0697562f5cecda9f42758aa Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 9 Sep 2016 17:37:02 -0500 Subject: tty: serial_core: add tty NULL check to uart_tx_stopped Commit 761ed4a94582 ("tty: serial_core: convert uart_close to use tty_port_close") created a case where a port used for a console does not get shutdown on tty closing. Then a call to uart_tx_stopped() segfaults because the tty is NULL. This could be fixed to restore old behavior, but we also want to allow tty_ports to work without a tty attached. So this change to allow a NULL tty_struct is needed either way. Fixes: 761ed4a94582 ("tty: serial_core: convert uart_close to use tty_port_close") Reported-by: kernel test robot Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index cdba6f144f72..378d80a8dd43 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -412,7 +412,7 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port); static inline int uart_tx_stopped(struct uart_port *port) { struct tty_struct *tty = port->state->port.tty; - if (tty->stopped || port->hw_stopped) + if ((tty && tty->stopped) || port->hw_stopped) return 1; return 0; } -- cgit v1.2.3 From d47529b2e9fe0ec2eb1f072afad8849f52e385c4 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 12 Sep 2016 18:16:31 -0400 Subject: gpio: don't include module.h in shared driver header Most shared headers in include/linux don't need to know what the internals of a struct module are; all they care about is that it is a struct and hence they may require a pointer to one. The advantage in this is that module.h is including a lot of stuff itself, and an otherwise empty C file that just contains module.h will result in ~750kB from CPP (compared to say 12kB from init.h) So we have approximately 50 instances of "struct module;" in the various include/linux headers already that help us keep module.h out of other headers; here we do the same for gpio. Cc: Linus Walleij Cc: Alexandre Courbot Cc: linux-gpio@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 50882e09289b..216e6f275aa8 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -16,6 +15,7 @@ struct of_phandle_args; struct device_node; struct seq_file; struct gpio_device; +struct module; #ifdef CONFIG_GPIOLIB -- cgit v1.2.3 From a67e9472da423ec47a3586920b526ebaedf25fc3 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 12 Sep 2016 14:01:29 +0100 Subject: of: Add array read functions with min/max size limits Add a new set of array reading functions that take a minimum and maximum size limit and will fail if the property size is not within the size limits. This makes it more convenient for drivers that use variable-size DT arrays which must be bounded at both ends - data must be at least N entries but must not overflow the array it is being copied into. It is also more efficient than making this functionality out of existing public functions and avoids duplication. The existing array functions have been left in the API, since there are a very large number of clients of those functions and their existing functionality is still useful. This avoids turning a small API improvement into a major kernel rework. The old functions have been turned into mininmal static inlines calling the new functions. The old functions had no upper limit on the actual size of the dts entry, to preserve this functionality rather than keeping two near-identical implementations, if the new function is called with max=0 there is no limit on the size of the dts entry but only the min number of elements are read. Signed-off-by: Richard Fitzgerald Signed-off-by: Rob Herring --- include/linux/of.h | 144 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 132 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 3d9ff8e9d803..299aeb192727 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -291,20 +291,24 @@ extern int of_property_count_elems_of_size(const struct device_node *np, extern int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value); -extern int of_property_read_u8_array(const struct device_node *np, - const char *propname, u8 *out_values, size_t sz); -extern int of_property_read_u16_array(const struct device_node *np, - const char *propname, u16 *out_values, size_t sz); -extern int of_property_read_u32_array(const struct device_node *np, - const char *propname, - u32 *out_values, - size_t sz); +extern int of_property_read_variable_u8_array(const struct device_node *np, + const char *propname, u8 *out_values, + size_t sz_min, size_t sz_max); +extern int of_property_read_variable_u16_array(const struct device_node *np, + const char *propname, u16 *out_values, + size_t sz_min, size_t sz_max); +extern int of_property_read_variable_u32_array(const struct device_node *np, + const char *propname, + u32 *out_values, + size_t sz_min, + size_t sz_max); extern int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value); -extern int of_property_read_u64_array(const struct device_node *np, - const char *propname, - u64 *out_values, - size_t sz); +extern int of_property_read_variable_u64_array(const struct device_node *np, + const char *propname, + u64 *out_values, + size_t sz_min, + size_t sz_max); extern int of_property_read_string(const struct device_node *np, const char *propname, @@ -380,6 +384,122 @@ extern int of_detach_node(struct device_node *); #define of_match_ptr(_ptr) (_ptr) +/** + * of_property_read_u8_array - Find and read an array of u8 from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 8-bit value(s) from + * it. Returns 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * dts entry of array should be like: + * property = /bits/ 8 <0x50 0x60 0x70>; + * + * The out_values is modified only if a valid u8 value can be decoded. + */ +static inline int of_property_read_u8_array(const struct device_node *np, + const char *propname, + u8 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u8_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + +/** + * of_property_read_u16_array - Find and read an array of u16 from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 16-bit value(s) from + * it. Returns 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * dts entry of array should be like: + * property = /bits/ 16 <0x5000 0x6000 0x7000>; + * + * The out_values is modified only if a valid u16 value can be decoded. + */ +static inline int of_property_read_u16_array(const struct device_node *np, + const char *propname, + u16 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u16_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + +/** + * of_property_read_u32_array - Find and read an array of 32 bit integers + * from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 32-bit value(s) from + * it. Returns 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u32 value can be decoded. + */ +static inline int of_property_read_u32_array(const struct device_node *np, + const char *propname, + u32 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u32_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + +/** + * of_property_read_u64_array - Find and read an array of 64 bit integers + * from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 64-bit value(s) from + * it. Returns 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u64 value can be decoded. + */ +static inline int of_property_read_u64_array(const struct device_node *np, + const char *propname, + u64 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u64_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + /* * struct property *prop; * const __be32 *p; -- cgit v1.2.3 From 8475c8118551f806176b5af4d0e8657a5f015b95 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Sep 2016 19:35:41 +0200 Subject: scsi: sd: Move DIF protection types to t10-pi.h These should go together with the rest of the T10 protection information defintions. [mkp: s/T10_DIF/T10_PI/] Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/linux/t10-pi.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index dd8de82cf5b5..9fba9dd33544 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -4,6 +4,26 @@ #include #include +/* + * A T10 PI-capable target device can be formatted with different + * protection schemes. Currently 0 through 3 are defined: + * + * Type 0 is regular (unprotected) I/O + * + * Type 1 defines the contents of the guard and reference tags + * + * Type 2 defines the contents of the guard and reference tags and + * uses 32-byte commands to seed the latter + * + * Type 3 defines the contents of the guard tag only + */ +enum t10_dif_type { + T10_PI_TYPE0_PROTECTION = 0x0, + T10_PI_TYPE1_PROTECTION = 0x1, + T10_PI_TYPE2_PROTECTION = 0x2, + T10_PI_TYPE3_PROTECTION = 0x3, +}; + /* * T10 Protection Information tuple. */ -- cgit v1.2.3 From bdd17e75cd97c5c39feee409890a91d0396640fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Sep 2016 16:18:53 +0200 Subject: blk-mq: only allocate a single mq_map per tag_set The mapping is identical for all queues in a tag_set, so stop wasting memory for building multiple. Note that for now I've kept the mq_map pointer in the request_queue, but we'll need to investigate if we can remove it without suffering too much from the additional pointer chasing. The same would apply to the mq_ops pointer as well. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 60ef14cbcd2d..deda16a9bde4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -67,6 +67,7 @@ struct blk_mq_hw_ctx { }; struct blk_mq_tag_set { + unsigned int *mq_map; struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; /* max hw supported */ -- cgit v1.2.3 From 7d7e0f90b70f6c5367c2d1c9a7e87dd228bd0816 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Sep 2016 16:18:54 +0200 Subject: blk-mq: remove ->map_queue All drivers use the default, so provide an inline version of it. If we ever need other queue mapping we can add an optional method back, although supporting will also require major changes to the queue setup code. This provides better code generation, and better debugability as well. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index deda16a9bde4..f01379f2b0ac 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -91,7 +91,6 @@ struct blk_mq_queue_data { }; typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); -typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); @@ -113,11 +112,6 @@ struct blk_mq_ops { */ queue_rq_fn *queue_rq; - /* - * Map to specific hardware queue - */ - map_queue_fn *map_queue; - /* * Called on request timeout */ @@ -223,7 +217,6 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } -struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); int blk_mq_request_started(struct request *rq); void blk_mq_start_request(struct request *rq); -- cgit v1.2.3 From da695ba236b993f07a540d35c17f271ef08c89f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Sep 2016 16:18:55 +0200 Subject: blk-mq: allow the driver to pass in a queue mapping This allows drivers specify their own queue mapping by overriding the setup-time function that builds the mq_map. This can be used for example to build the map based on the MSI-X vector mapping provided by the core interrupt layer for PCI devices. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f01379f2b0ac..6737fd7946f4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -104,6 +104,7 @@ typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef void (busy_tag_iter_fn)(struct request *, void *, bool); typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); +typedef int (map_queues_fn)(struct blk_mq_tag_set *set); struct blk_mq_ops { @@ -144,6 +145,8 @@ struct blk_mq_ops { init_request_fn *init_request; exit_request_fn *exit_request; reinit_request_fn *reinit_request; + + map_queues_fn *map_queues; }; enum { -- cgit v1.2.3 From 973c4e372c8f71a15ac39765e657ded70fc87d41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Sep 2016 16:18:56 +0200 Subject: blk-mq: provide a default queue mapping for PCI device Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq-pci.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/linux/blk-mq-pci.h (limited to 'include/linux') diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h new file mode 100644 index 000000000000..6ab595259112 --- /dev/null +++ b/include/linux/blk-mq-pci.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_BLK_MQ_PCI_H +#define _LINUX_BLK_MQ_PCI_H + +struct blk_mq_tag_set; +struct pci_dev; + +int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev); + +#endif /* _LINUX_BLK_MQ_PCI_H */ -- cgit v1.2.3 From 1b157939f92ae22d10b9d52baaa14f826927f5ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Sep 2016 16:18:59 +0200 Subject: blk-mq: get rid of the cpumask in struct blk_mq_tags Unused now that NVMe sets up irq affinity before calling into blk-mq. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 6737fd7946f4..c5a97d7cef93 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -201,7 +201,6 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op, unsigned int flags, unsigned int hctx_idx); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); -struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags); enum { BLK_MQ_UNIQUE_TAG_BITS = 16, -- cgit v1.2.3 From 477b0229ac9bc275f6f8d2c27a2d08b246fccd0e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 16 Nov 2015 15:53:13 +0100 Subject: mtd: introduce the mtd_pairing_scheme concept MLC and TLC NAND devices are using NAND cells exposing more than one bit, but instead of attaching all the bits in a given cell to a single NAND page, each bit is usually attached to a different page. This concept is called 'page pairing', and has significant impacts on the flash storage usage. The main problem showed by these devices is that interrupting a page program operation may not only corrupt the page we are programming but also the page it is paired with, hence the need to expose to MTD users the pairing scheme information. The pairing APIs allows one to query pairing information attached to a given page (here called wunit), or the other way around (the wunit pointed by pairing information). It also provides several helpers to help the conversion between absolute offsets and wunits, and query the number of pairing groups. Signed-off-by: Boris Brezillon Reviewed-by: Brian Norris --- include/linux/mtd/mtd.h | 107 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 29a170612203..13f8052b9ff9 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -127,6 +127,82 @@ struct mtd_ooblayout_ops { struct mtd_oob_region *oobfree); }; +/** + * struct mtd_pairing_info - page pairing information + * + * @pair: pair id + * @group: group id + * + * The term "pair" is used here, even though TLC NANDs might group pages by 3 + * (3 bits in a single cell). A pair should regroup all pages that are sharing + * the same cell. Pairs are then indexed in ascending order. + * + * @group is defining the position of a page in a given pair. It can also be + * seen as the bit position in the cell: page attached to bit 0 belongs to + * group 0, page attached to bit 1 belongs to group 1, etc. + * + * Example: + * The H27UCG8T2BTR-BC datasheet describes the following pairing scheme: + * + * group-0 group-1 + * + * pair-0 page-0 page-4 + * pair-1 page-1 page-5 + * pair-2 page-2 page-8 + * ... + * pair-127 page-251 page-255 + * + * + * Note that the "group" and "pair" terms were extracted from Samsung and + * Hynix datasheets, and might be referenced under other names in other + * datasheets (Micron is describing this concept as "shared pages"). + */ +struct mtd_pairing_info { + int pair; + int group; +}; + +/** + * struct mtd_pairing_scheme - page pairing scheme description + * + * @ngroups: number of groups. Should be related to the number of bits + * per cell. + * @get_info: converts a write-unit (page number within an erase block) into + * mtd_pairing information (pair + group). This function should + * fill the info parameter based on the wunit index or return + * -EINVAL if the wunit parameter is invalid. + * @get_wunit: converts pairing information into a write-unit (page) number. + * This function should return the wunit index pointed by the + * pairing information described in the info argument. It should + * return -EINVAL, if there's no wunit corresponding to the + * passed pairing information. + * + * See mtd_pairing_info documentation for a detailed explanation of the + * pair and group concepts. + * + * The mtd_pairing_scheme structure provides a generic solution to represent + * NAND page pairing scheme. Instead of exposing two big tables to do the + * write-unit <-> (pair + group) conversions, we ask the MTD drivers to + * implement the ->get_info() and ->get_wunit() functions. + * + * MTD users will then be able to query these information by using the + * mtd_pairing_info_to_wunit() and mtd_wunit_to_pairing_info() helpers. + * + * @ngroups is here to help MTD users iterating over all the pages in a + * given pair. This value can be retrieved by MTD users using the + * mtd_pairing_groups() helper. + * + * Examples are given in the mtd_pairing_info_to_wunit() and + * mtd_wunit_to_pairing_info() documentation. + */ +struct mtd_pairing_scheme { + int ngroups; + int (*get_info)(struct mtd_info *mtd, int wunit, + struct mtd_pairing_info *info); + int (*get_wunit)(struct mtd_info *mtd, + const struct mtd_pairing_info *info); +}; + struct module; /* only needed for owner field in mtd_info */ struct mtd_info { @@ -188,6 +264,9 @@ struct mtd_info { /* OOB layout description */ const struct mtd_ooblayout_ops *ooblayout; + /* NAND pairing scheme, only provided for MLC/TLC NANDs */ + const struct mtd_pairing_scheme *pairing; + /* the ecc step size. */ unsigned int ecc_step_size; @@ -296,6 +375,12 @@ static inline void mtd_set_ooblayout(struct mtd_info *mtd, mtd->ooblayout = ooblayout; } +static inline void mtd_set_pairing_scheme(struct mtd_info *mtd, + const struct mtd_pairing_scheme *pairing) +{ + mtd->pairing = pairing; +} + static inline void mtd_set_of_node(struct mtd_info *mtd, struct device_node *np) { @@ -312,6 +397,11 @@ static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize; } +int mtd_wunit_to_pairing_info(struct mtd_info *mtd, int wunit, + struct mtd_pairing_info *info); +int mtd_pairing_info_to_wunit(struct mtd_info *mtd, + const struct mtd_pairing_info *info); +int mtd_pairing_groups(struct mtd_info *mtd); int mtd_erase(struct mtd_info *mtd, struct erase_info *instr); int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, void **virt, resource_size_t *phys); @@ -397,6 +487,23 @@ static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) return do_div(sz, mtd->writesize); } +static inline int mtd_wunit_per_eb(struct mtd_info *mtd) +{ + return mtd->erasesize / mtd->writesize; +} + +static inline int mtd_offset_to_wunit(struct mtd_info *mtd, loff_t offs) +{ + return mtd_div_by_ws(mtd_mod_by_eb(offs, mtd), mtd); +} + +static inline loff_t mtd_wunit_to_offset(struct mtd_info *mtd, loff_t base, + int wunit) +{ + return base + (wunit * mtd->writesize); +} + + static inline int mtd_has_oob(const struct mtd_info *mtd) { return mtd->_read_oob && mtd->_write_oob; -- cgit v1.2.3 From 68620e594c250ba8c43a78e77f5296cb9952582e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 14 Sep 2016 20:54:12 +0200 Subject: leds: gpio: introduce gpio_blink_set_t Introduce a typedef gpio_blink_set_t to improve readability of the code. Signed-off-by: Heiner Kallweit Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 8a3b5d29602f..ddfcb2df3656 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -359,6 +359,11 @@ struct led_platform_data { struct led_info *leds; }; +struct gpio_desc; +typedef int (*gpio_blink_set_t)(struct gpio_desc *desc, int state, + unsigned long *delay_on, + unsigned long *delay_off); + /* For the leds-gpio driver */ struct gpio_led { const char *name; @@ -382,9 +387,7 @@ struct gpio_led_platform_data { #define GPIO_LED_NO_BLINK_LOW 0 /* No blink GPIO state low */ #define GPIO_LED_NO_BLINK_HIGH 1 /* No blink GPIO state high */ #define GPIO_LED_BLINK 2 /* Please, blink */ - int (*gpio_blink_set)(struct gpio_desc *desc, int state, - unsigned long *delay_on, - unsigned long *delay_off); + gpio_blink_set_t gpio_blink_set; }; #ifdef CONFIG_NEW_LEDS -- cgit v1.2.3 From 434cec62a6d73b8c8080cd992bc97a564fdd5a5a Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Thu, 15 Sep 2016 07:37:30 +0200 Subject: bus: mvebu-mbus: Provide stub function for mvebu_mbus_get_io_win_info() This patch provides a stub function for mvebu_mbus_get_io_win_info(), which will be used for all non-Orion (ARM32 MVEBU) platforms for compile test coverage. On such platforms this function will return an error so that drivers might detect a potential problem. Signed-off-by: Stefan Roese Acked-by: Gregory CLEMENT Cc: Thomas Petazzoni Cc: Marcin Wojtas Cc: Arnd Bergmann Cc: Andrew Lunn Cc: Vinod Koul Signed-off-by: Vinod Koul --- include/linux/mbus.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mbus.h b/include/linux/mbus.h index d610232762e3..2931aa43dab1 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -11,6 +11,8 @@ #ifndef __LINUX_MBUS_H #define __LINUX_MBUS_H +#include + struct resource; struct mbus_dram_target_info @@ -55,6 +57,8 @@ struct mbus_dram_target_info #ifdef CONFIG_PLAT_ORION extern const struct mbus_dram_target_info *mv_mbus_dram_info(void); extern const struct mbus_dram_target_info *mv_mbus_dram_info_nooverlap(void); +int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target, + u8 *attr); #else static inline const struct mbus_dram_target_info *mv_mbus_dram_info(void) { @@ -64,14 +68,24 @@ static inline const struct mbus_dram_target_info *mv_mbus_dram_info_nooverlap(vo { return NULL; } +static inline int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, + u8 *target, u8 *attr) +{ + /* + * On all ARM32 MVEBU platforms with MBus support, this stub + * function will not get called. The real function from the + * MBus driver is called instead. ARM64 MVEBU platforms like + * the Armada 3700 could use the mv_xor device driver which calls + * into this function + */ + return -EINVAL; +} #endif int mvebu_mbus_save_cpu_target(u32 __iomem *store_addr); void mvebu_mbus_get_pcie_mem_aperture(struct resource *res); void mvebu_mbus_get_pcie_io_aperture(struct resource *res); int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr); -int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target, - u8 *attr); int mvebu_mbus_add_window_remap_by_id(unsigned int target, unsigned int attribute, phys_addr_t base, size_t size, -- cgit v1.2.3 From 71d0bc65ba089e8e769cddad66dae8cb4c49a0d4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 23 Aug 2016 16:09:40 +0300 Subject: dmaengine: hsu: refactor hsu_dma_do_irq() to return int Since we have nice macro IRQ_RETVAL() we would use it to convert a flag of handled interrupt from int to irqreturn_t. The rationale of doing this is: a) hence we implicitly mark hsu_dma_do_irq() as an auxiliary function that can't be used as interrupt handler directly, and b) to be in align with serial driver which is using serial8250_handle_irq() that returns plain int by design. Signed-off-by: Andy Shevchenko Acked-by: Greg Kroah-Hartman Signed-off-by: Vinod Koul --- include/linux/dma/hsu.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h index aaff68efba5d..197eec63e501 100644 --- a/include/linux/dma/hsu.h +++ b/include/linux/dma/hsu.h @@ -41,8 +41,7 @@ struct hsu_dma_chip { /* Export to the internal users */ int hsu_dma_get_status(struct hsu_dma_chip *chip, unsigned short nr, u32 *status); -irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, - u32 status); +int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, u32 status); /* Export to the platform drivers */ int hsu_dma_probe(struct hsu_dma_chip *chip); @@ -53,10 +52,10 @@ static inline int hsu_dma_get_status(struct hsu_dma_chip *chip, { return 0; } -static inline irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, - unsigned short nr, u32 status) +static inline int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, + u32 status) { - return IRQ_NONE; + return 0; } static inline int hsu_dma_probe(struct hsu_dma_chip *chip) { return -ENODEV; } static inline int hsu_dma_remove(struct hsu_dma_chip *chip) { return 0; } -- cgit v1.2.3 From 8f39850dffa9cba0f6920ff907710bcddc7f2a26 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 15 Sep 2016 13:32:11 -0400 Subject: fscrypto: improved validation when loading inode encryption metadata - Validate fscrypt_context.format and fscrypt_context.flags. If unrecognized values are set, then the kernel may not know how to interpret the encrypted file, so it should fail the operation. - Validate that AES_256_XTS is used for contents and that AES_256_CTS is used for filenames. It was previously possible for the kernel to accept these reversed, though it would have taken manual editing of the block device. This was not intended. - Fail cleanly rather than BUG()-ing if a file has an unexpected type. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypto.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index cfa6cde25f8e..00813c2a8495 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -111,23 +111,6 @@ struct fscrypt_completion_result { struct fscrypt_completion_result ecr = { \ COMPLETION_INITIALIZER((ecr).completion), 0 } -static inline int fscrypt_key_size(int mode) -{ - switch (mode) { - case FS_ENCRYPTION_MODE_AES_256_XTS: - return FS_AES_256_XTS_KEY_SIZE; - case FS_ENCRYPTION_MODE_AES_256_GCM: - return FS_AES_256_GCM_KEY_SIZE; - case FS_ENCRYPTION_MODE_AES_256_CBC: - return FS_AES_256_CBC_KEY_SIZE; - case FS_ENCRYPTION_MODE_AES_256_CTS: - return FS_AES_256_CTS_KEY_SIZE; - default: - BUG(); - } - return 0; -} - #define FS_FNAME_NUM_SCATTER_ENTRIES 4 #define FS_CRYPTO_BLOCK_SIZE 16 #define FS_FNAME_CRYPTO_DIGEST_SIZE 32 @@ -202,13 +185,6 @@ static inline bool fscrypt_valid_filenames_enc_mode(u32 mode) return (mode == FS_ENCRYPTION_MODE_AES_256_CTS); } -static inline u32 fscrypt_validate_encryption_key_size(u32 mode, u32 size) -{ - if (size == fscrypt_key_size(mode)) - return size; - return 0; -} - static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { if (str->len == 1 && str->name[0] == '.') -- cgit v1.2.3 From 9e5ab85deb2c80f9707be39cd0a2c5f90c89dd97 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 15 Sep 2016 18:25:07 -0400 Subject: blockgroup_lock.h: remove debris from bgl_lock_ptr() conversion An obsolete comment and extra parentheses were left over from when the sb_bgl_lock() macro was replaced with the bgl_lock_ptr() function. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger --- include/linux/blockgroup_lock.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blockgroup_lock.h b/include/linux/blockgroup_lock.h index e44b88ba552b..61b583d7519a 100644 --- a/include/linux/blockgroup_lock.h +++ b/include/linux/blockgroup_lock.h @@ -49,14 +49,10 @@ static inline void bgl_lock_init(struct blockgroup_lock *bgl) spin_lock_init(&bgl->locks[i].lock); } -/* - * The accessor is a macro so we can embed a blockgroup_lock into different - * superblock types - */ static inline spinlock_t * bgl_lock_ptr(struct blockgroup_lock *bgl, unsigned int block_group) { - return &bgl->locks[(block_group) & (NR_BG_LOCKS-1)].lock; + return &bgl->locks[block_group & (NR_BG_LOCKS-1)].lock; } #endif -- cgit v1.2.3 From 7c5f6b320b59cb4a674750bbb29a248b5bae7641 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 15 Sep 2016 18:29:06 -0400 Subject: blockgroup_lock.h: simplify definition of NR_BG_LOCKS We can use ilog2() to more easily produce the desired NR_BG_LOCKS. This works because ilog2() is evaluated at compile-time when its argument is a compile-time constant. I did not change the chosen NR_BG_LOCKS values. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger --- include/linux/blockgroup_lock.h | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blockgroup_lock.h b/include/linux/blockgroup_lock.h index 61b583d7519a..225bdb7daec7 100644 --- a/include/linux/blockgroup_lock.h +++ b/include/linux/blockgroup_lock.h @@ -10,28 +10,10 @@ #include #ifdef CONFIG_SMP - -/* - * We want a power-of-two. Is there a better way than this? - */ - -#if NR_CPUS >= 32 -#define NR_BG_LOCKS 128 -#elif NR_CPUS >= 16 -#define NR_BG_LOCKS 64 -#elif NR_CPUS >= 8 -#define NR_BG_LOCKS 32 -#elif NR_CPUS >= 4 -#define NR_BG_LOCKS 16 -#elif NR_CPUS >= 2 -#define NR_BG_LOCKS 8 +#define NR_BG_LOCKS (4 << ilog2(NR_CPUS < 32 ? NR_CPUS : 32)) #else -#define NR_BG_LOCKS 4 -#endif - -#else /* CONFIG_SMP */ #define NR_BG_LOCKS 1 -#endif /* CONFIG_SMP */ +#endif struct bgl_lock { spinlock_t lock; -- cgit v1.2.3 From eeb7df270f1c4629b52fc1f98035fe9e7fe63df2 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 16 Sep 2016 12:19:07 +0530 Subject: include: extcon: Fix compilation error caused because of incomplete merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following compilation error caused due to incomplete merge. This is observed if CONFIG_EXTCON is not set. In file included from ./include/linux/mfd/palmas.h:23:0, from drivers/input/misc/palmas-pwrbutton.c:22: ./include/linux/extcon.h: In function ‘extcon_sync’: ./include/linux/extcon.h:361:1: error: expected declaration specifiers before ‘<<’ token ./include/linux/extcon.h:370:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token ./include/linux/extcon.h:376:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘>>’ token ./include/linux/extcon.h:381:1: error: expected declaration specifiers before ‘<<’ token ./include/linux/extcon.h:390:1: error: expected declaration specifiers or ‘...’ before ‘==’ token ./include/linux/extcon.h:476:11: warning: ‘struct extcon_specific_cable_nb’ declared inside parameter list [enabled by default] ./include/linux/extcon.h:476:11: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] ./include/linux/extcon.h:474:19: error: storage class specified for parameter ‘extcon_register_interest’ ./include/linux/extcon.h:474:19: warning: parameter ‘extcon_register_interest’ declared ‘inline’ [enabled by default] ./include/linux/extcon.h:477:1: warning: ‘always_inline’ attribute ignored [-Wattributes] ./include/linux/extcon.h:474:19: error: ‘no_instrument_function’ attribute applies only to functions ./include/linux/extcon.h:477:1: error: expected ‘;’, ‘,’ or ‘)’ before ‘{’ token Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- include/linux/extcon.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 2b9f15156115..b871c0cb1f02 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -358,8 +358,6 @@ static inline int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id } static inline int extcon_sync(struct extcon_dev *edev, unsigned int id) -<<<<<<< HEAD -======= { return 0; } @@ -373,50 +371,23 @@ static inline int extcon_get_property(struct extcon_dev *edev, unsigned int id, static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val) ->>>>>>> next { return 0; } -<<<<<<< HEAD -static inline int extcon_get_property(struct extcon_dev *edev, unsigned int id, - unsigned int prop, - union extcon_property_value *prop_val) -{ - return 0; -} -static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, - unsigned int prop, -======= static inline int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id, unsigned int prop, ->>>>>>> next union extcon_property_value prop_val) { return 0; } -<<<<<<< HEAD -static inline int extcon_set_property_sync(struct extcon_dev *edev, - unsigned int id, unsigned int prop, - union extcon_property_value prop_val) -======= -static inline int extcon_get_property_capability(struct extcon_dev *edev, - unsigned int id, unsigned int prop) ->>>>>>> next -{ - return 0; -} - -<<<<<<< HEAD static inline int extcon_get_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop) { return 0; } -======= ->>>>>>> next static inline int extcon_set_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop) { -- cgit v1.2.3 From c6c314a613cd7d03fb97713e0d642b493de42e69 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 15 Sep 2016 22:45:43 -0700 Subject: sched/core: Add try_get_task_stack() and put_task_stack() There are a few places in the kernel that access stack memory belonging to a different task. Before we can start freeing task stacks before the task_struct is freed, we need a way for those code paths to pin the stack. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/17a434f50ad3d77000104f21666575e10a9c1fbd.1474003868.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a287e8b13549..a95867267e9f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3094,11 +3094,19 @@ static inline struct thread_info *task_thread_info(struct task_struct *task) { return &task->thread_info; } + +/* + * When accessing the stack of a non-current task that might exit, use + * try_get_task_stack() instead. task_stack_page will return a pointer + * that could get freed out from under you. + */ static inline void *task_stack_page(const struct task_struct *task) { return task->stack; } + #define setup_thread_stack(new,old) do { } while(0) + static inline unsigned long *end_of_stack(const struct task_struct *task) { return task->stack; @@ -3134,6 +3142,14 @@ static inline unsigned long *end_of_stack(struct task_struct *p) } #endif + +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return task_stack_page(tsk); +} + +static inline void put_task_stack(struct task_struct *tsk) {} + #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) -- cgit v1.2.3 From 68f24b08ee892d47bdef925d676e1ae1ccc316f8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 15 Sep 2016 22:45:48 -0700 Subject: sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK We currently keep every task's stack around until the task_struct itself is freed. This means that we keep the stack allocation alive for longer than necessary and that, under load, we free stacks in big batches whenever RCU drops the last task reference. Neither of these is good for reuse of cache-hot memory, and freeing in batches prevents us from usefully caching small numbers of vmalloced stacks. On architectures that have thread_info on the stack, we can't easily change this, but on architectures that set THREAD_INFO_IN_TASK, we can free it as soon as the task is dead. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/08ca06cde00ebed0046c5d26cbbf3fbb7ef5b812.1474003868.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 4 +++- include/linux/sched.h | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9c04d44eeb3c..325f649d77ff 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -186,7 +186,9 @@ extern struct task_group root_task_group; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK -# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), +# define INIT_TASK_TI(tsk) \ + .thread_info = INIT_THREAD_INFO(tsk), \ + .stack_refcount = ATOMIC_INIT(1), #else # define INIT_TASK_TI(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index a95867267e9f..abb795afc823 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1936,6 +1936,10 @@ struct task_struct { #ifdef CONFIG_VMAP_STACK struct vm_struct *stack_vm_area; #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK + /* A live task holds one reference. */ + atomic_t stack_refcount; +#endif /* CPU-specific state of this task */ struct thread_struct thread; /* @@ -3143,12 +3147,22 @@ static inline unsigned long *end_of_stack(struct task_struct *p) #endif +#ifdef CONFIG_THREAD_INFO_IN_TASK +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return atomic_inc_not_zero(&tsk->stack_refcount) ? + task_stack_page(tsk) : NULL; +} + +extern void put_task_stack(struct task_struct *tsk); +#else static inline void *try_get_task_stack(struct task_struct *tsk) { return task_stack_page(tsk); } static inline void put_task_stack(struct task_struct *tsk) {} +#endif #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) -- cgit v1.2.3 From 987068fcbdb7a085bb11151b91dc6f4c956c4a1b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Sep 2016 17:13:40 +0100 Subject: of/irq: Break out msi-map lookup (again) The PCI msi-map code is already doing double-duty translating IDs and retrieving MSI parents, which unsurprisingly is the same functionality we need for the identically-formatted PCI iommu-map property. Drag the core parsing routine up yet another layer into the general OF-PCI code, and further generalise it for either kind of lookup in either flavour of map property. Acked-by: Rob Herring Acked-by: Marc Zyngier Tested-by: Lorenzo Pieralisi Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- include/linux/of_pci.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index b969e9443962..7fd5cfce9140 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -17,6 +17,9 @@ int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); void of_pci_check_probe_only(void); +int of_pci_map_rid(struct device_node *np, u32 rid, + const char *map_name, const char *map_mask_name, + struct device_node **target, u32 *id_out); #else static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq) { @@ -52,6 +55,13 @@ of_get_pci_domain_nr(struct device_node *node) return -1; } +static inline int of_pci_map_rid(struct device_node *np, u32 rid, + const char *map_name, const char *map_mask_name, + struct device_node **target, u32 *id_out) +{ + return -EINVAL; +} + static inline void of_pci_check_probe_only(void) { } #endif -- cgit v1.2.3 From 57f98d2f61e191ef9d06863c9ce3f8621f3671ef Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 13 Sep 2016 10:54:14 +0100 Subject: iommu: Introduce iommu_fwspec Introduce a common structure to hold the per-device firmware data that most IOMMU drivers need to keep track of. This enables us to configure much of that data from common firmware code, and consolidate a lot of the equivalent implementations, device look-up tables, etc. which are currently strewn across IOMMU drivers. This will also be enable us to address the outstanding "multiple IOMMUs on the platform bus" problem by tweaking IOMMU API calls to prefer dev->fwspec->ops before falling back to dev->bus->iommu_ops, and thus gracefully handle those troublesome systems which we currently cannot. As the first user, hook up the OF IOMMU configuration mechanism. The driver-defined nature of DT cells means that we still need the drivers to translate and add the IDs themselves, but future users such as the much less free-form ACPI IORT will be much simpler and self-contained. CC: Greg Kroah-Hartman Suggested-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- include/linux/device.h | 3 +++ include/linux/iommu.h | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 38f02814d53a..bc41e87a969b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -41,6 +41,7 @@ struct device_node; struct fwnode_handle; struct iommu_ops; struct iommu_group; +struct iommu_fwspec; struct bus_attribute { struct attribute attr; @@ -765,6 +766,7 @@ struct device_dma_parameters { * gone away. This should be set by the allocator of the * device (i.e. the bus driver that discovered the device). * @iommu_group: IOMMU group the device belongs to. + * @iommu_fwspec: IOMMU-specific properties supplied by firmware. * * @offline_disabled: If set, the device is permanently online. * @offline: Set after successful invocation of bus type's .offline(). @@ -849,6 +851,7 @@ struct device { void (*release)(struct device *dev); struct iommu_group *iommu_group; + struct iommu_fwspec *iommu_fwspec; bool offline_disabled:1; bool offline:1; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a35fb8b42e1a..436dc21318af 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -331,10 +331,32 @@ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ extern struct iommu_group *generic_device_group(struct device *dev); +/** + * struct iommu_fwspec - per-device IOMMU instance data + * @ops: ops for this device's IOMMU + * @iommu_fwnode: firmware handle for this device's IOMMU + * @iommu_priv: IOMMU driver private data for this device + * @num_ids: number of associated device IDs + * @ids: IDs which this device may present to the IOMMU + */ +struct iommu_fwspec { + const struct iommu_ops *ops; + struct fwnode_handle *iommu_fwnode; + void *iommu_priv; + unsigned int num_ids; + u32 ids[1]; +}; + +int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, + const struct iommu_ops *ops); +void iommu_fwspec_free(struct device *dev); +int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; struct iommu_group {}; +struct iommu_fwspec {}; static inline bool iommu_present(struct bus_type *bus) { @@ -541,6 +563,23 @@ static inline void iommu_device_unlink(struct device *dev, struct device *link) { } +static inline int iommu_fwspec_init(struct device *dev, + struct fwnode_handle *iommu_fwnode, + const struct iommu_ops *ops) +{ + return -ENODEV; +} + +static inline void iommu_fwspec_free(struct device *dev) +{ +} + +static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, + int num_ids) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_API */ #endif /* __LINUX_IOMMU_H */ -- cgit v1.2.3 From 44bb7e243bd4b4e5c79de2452cd9762582f58925 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Sep 2016 17:13:59 +0100 Subject: iommu/dma: Add support for mapping MSIs When an MSI doorbell is located downstream of an IOMMU, attaching devices to a DMA ops domain and switching on translation leads to a rude shock when their attempt to write to the physical address returned by the irqchip driver faults (or worse, writes into some already-mapped buffer) and no interrupt is forthcoming. Address this by adding a hook for relevant irqchip drivers to call from their compose_msi_msg() callback, to swizzle the physical address with an appropriatly-mapped IOVA for any device attached to one of our DMA ops domains. Acked-by: Thomas Gleixner Acked-by: Marc Zyngier Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- include/linux/dma-iommu.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 81c5c8d167ad..5ee806e41b5c 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -21,6 +21,7 @@ #ifdef CONFIG_IOMMU_DMA #include +#include int iommu_dma_init(void); @@ -62,9 +63,13 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int iommu_dma_supported(struct device *dev, u64 mask); int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); +/* The DMA API isn't _quite_ the whole story, though... */ +void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg); + #else struct iommu_domain; +struct msi_msg; static inline int iommu_dma_init(void) { @@ -80,6 +85,10 @@ static inline void iommu_put_dma_cookie(struct iommu_domain *domain) { } +static inline void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __KERNEL__ */ #endif /* __DMA_IOMMU_H */ -- cgit v1.2.3 From fade1ec055dc6b6373e7487906b7899b41d0c46f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Sep 2016 17:14:00 +0100 Subject: iommu/dma: Avoid PCI host bridge windows With our DMA ops enabled for PCI devices, we should avoid allocating IOVAs which a host bridge might misinterpret as peer-to-peer DMA and lead to faults, corruption or other badness. To be safe, punch out holes for all of the relevant host bridge's windows when initialising a DMA domain for a PCI device. CC: Marek Szyprowski CC: Inki Dae Reported-by: Lorenzo Pieralisi Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- include/linux/dma-iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 5ee806e41b5c..32c589062bd9 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -30,7 +30,8 @@ int iommu_get_dma_cookie(struct iommu_domain *domain); void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ -int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size); +int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, + u64 size, struct device *dev); /* General helpers for DMA-API <-> IOMMU-API interaction */ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent); -- cgit v1.2.3 From f2b20f6ee842313a0d681dbbf7f87b70291a6a3b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: vfs: move permission checking into notify_change() for utimes(NULL) This fixes a bug where the permission was not properly checked in overlayfs. The testcase is ltp/utimensat01. It is also cleaner and safer to do the permission checking in the vfs helper instead of the caller. This patch introduces an additional ia_valid flag ATTR_TOUCH (since touch(1) is the most obvious user of utimes(NULL)) that is passed into notify_change whenever the conditions for this special permission checking mode are met. Reported-by: Aihua Zhang Signed-off-by: Miklos Szeredi Tested-by: Aihua Zhang Cc: # v3.18+ --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..7c391366fb43 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -224,6 +224,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_KILL_PRIV (1 << 14) #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) +#define ATTR_TOUCH (1 << 17) /* * Whiteout is represented by a char device. The following constants define the -- cgit v1.2.3 From 598e3c8f72f5b77c84d2cb26cfd936ffb3cfdbaa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: vfs: update ovl inode before relatime check On overlayfs relatime_need_update() needs inode times to be correct on overlay inode. But i_mtime and i_ctime are updated by filesystem code on underlying inode only, so they will be out-of-date on the overlay inode. This patch copies the times from the underlying inode if needed. This can't be done if called from RCU lookup (link following) but link m/ctime are not updated by fs, so this is all right. This patch doesn't change functionality for anything but overlayfs. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7c391366fb43..7db097d673a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2007,7 +2007,6 @@ enum file_time_flags { S_VERSION = 8, }; -extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); static inline void file_accessed(struct file *file) { -- cgit v1.2.3 From f3fbbb079263bd29ae592478de6808db7e708267 Mon Sep 17 00:00:00 2001 From: Aihua Zhang Date: Thu, 7 Jul 2016 15:37:53 +0800 Subject: fsnotify: support overlayfs When an event occurs direct it to the overlay inode instead of the real underlying inode. This will work even if the file was first on the lower layer and then copied up, while the watch is there. This is because the watch is on the overlay inode, which stays the same through the copy-up. For filesystems other than overlayfs this is a no-op, except for the performance impact of an extra pointer dereferece. Verified to work correctly with the inotify/fanotify tests in LTP. Signed-off-by: Aihua Zhang Signed-off-by: Miklos Szeredi Cc: Jan Kara Cc: Eric Paris --- include/linux/fsnotify.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index eed9e853a06f..b8bcc058e031 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -29,7 +29,11 @@ static inline int fsnotify_parent(struct path *path, struct dentry *dentry, __u3 static inline int fsnotify_perm(struct file *file, int mask) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + /* + * Do not use file_inode() here or anywhere in this file to get the + * inode. That would break *notity on overlayfs. + */ + struct inode *inode = path->dentry->d_inode; __u32 fsnotify_mask = 0; int ret; @@ -173,7 +177,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) static inline void fsnotify_access(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + struct inode *inode = path->dentry->d_inode; __u32 mask = FS_ACCESS; if (S_ISDIR(inode->i_mode)) @@ -191,7 +195,7 @@ static inline void fsnotify_access(struct file *file) static inline void fsnotify_modify(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + struct inode *inode = path->dentry->d_inode; __u32 mask = FS_MODIFY; if (S_ISDIR(inode->i_mode)) @@ -209,7 +213,7 @@ static inline void fsnotify_modify(struct file *file) static inline void fsnotify_open(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + struct inode *inode = path->dentry->d_inode; __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) @@ -225,7 +229,7 @@ static inline void fsnotify_open(struct file *file) static inline void fsnotify_close(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + struct inode *inode = path->dentry->d_inode; fmode_t mode = file->f_mode; __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; -- cgit v1.2.3 From c568d68341be7030f5647def68851e469b21ca11 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: locks: fix file locking on overlayfs This patch allows flock, posix locks, ofd locks and leases to work correctly on overlayfs. Instead of using the underlying inode for storing lock context use the overlay inode. This allows locks to be persistent across copy-up. This is done by introducing locks_inode() helper and using it instead of file_inode() to get the inode in locking code. For non-overlayfs the two are equivalent, except for an extra pointer dereference in locks_inode(). Since lock operations are in "struct file_operations" we must also make sure not to call underlying filesystem's lock operations. Introcude a super block flag MS_NOREMOTELOCK to this effect. Signed-off-by: Miklos Szeredi Acked-by: Jeff Layton Cc: "J. Bruce Fields" --- include/linux/fs.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7db097d673a8..8ee0f011547f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1065,6 +1065,18 @@ struct file_lock_context { extern void send_sigio(struct fown_struct *fown, int fd, int band); +/* + * Return the inode to use for locking + * + * For overlayfs this should be the overlay inode, not the real inode returned + * by file_inode(). For any other fs file_inode(filp) and locks_inode(filp) are + * equal. + */ +static inline struct inode *locks_inode(const struct file *f) +{ + return f->f_path.dentry->d_inode; +} + #ifdef CONFIG_FILE_LOCKING extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, @@ -1252,7 +1264,7 @@ static inline struct dentry *file_dentry(const struct file *file) static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { - return locks_lock_inode_wait(file_inode(filp), fl); + return locks_lock_inode_wait(locks_inode(filp), fl); } struct fasync_struct { @@ -2155,7 +2167,7 @@ static inline int mandatory_lock(struct inode *ino) static inline int locks_verify_locked(struct file *file) { - if (mandatory_lock(file_inode(file))) + if (mandatory_lock(locks_inode(file))) return locks_mandatory_locked(file); return 0; } -- cgit v1.2.3 From 7b1742eb06ead6d02a6cf3c44587088e5392d1aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: vfs: make argument of d_real_inode() const d_op->d_real() leaves the dentry alone except if the third argument is non-zero. Unfortunately very difficult to explain to the compiler without a cast. Signed-off-by: Miklos Szeredi Acked-by: Jeff Layton --- include/linux/dcache.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 5ff3e9a4fe5f..5beed7b30561 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -584,9 +584,10 @@ static inline struct dentry *d_real(struct dentry *dentry, * If dentry is on an union/overlay, then return the underlying, real inode. * Otherwise return d_inode(). */ -static inline struct inode *d_real_inode(struct dentry *dentry) +static inline struct inode *d_real_inode(const struct dentry *dentry) { - return d_backing_inode(d_real(dentry, NULL, 0)); + /* This usage of d_real() results in const dentry */ + return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0)); } -- cgit v1.2.3 From 323117ab60156d5ef021eeef260c4e7e0a7f520e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 Sep 2016 15:38:25 +0200 Subject: spi: core: Use spi_sync_transfer() in spi_write()/spi_read() Simplify spi_write() and spi_read() using the spi_sync_transfer() helper. This requires moving spi_sync_transfer() up. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 58 ++++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 072cb2aa2413..74278c7d0f52 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -979,6 +979,30 @@ extern int spi_sync_locked(struct spi_device *spi, struct spi_message *message); extern int spi_bus_lock(struct spi_master *master); extern int spi_bus_unlock(struct spi_master *master); +/** + * spi_sync_transfer - synchronous SPI data transfer + * @spi: device with which data will be exchanged + * @xfers: An array of spi_transfers + * @num_xfers: Number of items in the xfer array + * Context: can sleep + * + * Does a synchronous SPI data transfer of the given spi_transfer array. + * + * For more specific semantics see spi_sync(). + * + * Return: Return: zero on success, else a negative error code. + */ +static inline int +spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers, + unsigned int num_xfers) +{ + struct spi_message msg; + + spi_message_init_with_transfers(&msg, xfers, num_xfers); + + return spi_sync(spi, &msg); +} + /** * spi_write - SPI synchronous write * @spi: device to which data will be written @@ -998,11 +1022,8 @@ spi_write(struct spi_device *spi, const void *buf, size_t len) .tx_buf = buf, .len = len, }; - struct spi_message m; - spi_message_init(&m); - spi_message_add_tail(&t, &m); - return spi_sync(spi, &m); + return spi_sync_transfer(spi, &t, 1); } /** @@ -1024,35 +1045,8 @@ spi_read(struct spi_device *spi, void *buf, size_t len) .rx_buf = buf, .len = len, }; - struct spi_message m; - spi_message_init(&m); - spi_message_add_tail(&t, &m); - return spi_sync(spi, &m); -} - -/** - * spi_sync_transfer - synchronous SPI data transfer - * @spi: device with which data will be exchanged - * @xfers: An array of spi_transfers - * @num_xfers: Number of items in the xfer array - * Context: can sleep - * - * Does a synchronous SPI data transfer of the given spi_transfer array. - * - * For more specific semantics see spi_sync(). - * - * Return: Return: zero on success, else a negative error code. - */ -static inline int -spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers, - unsigned int num_xfers) -{ - struct spi_message msg; - - spi_message_init_with_transfers(&msg, xfers, num_xfers); - - return spi_sync(spi, &msg); + return spi_sync_transfer(spi, &t, 1); } /* this copies txbuf and rxbuf data; for small transfers only! */ -- cgit v1.2.3 From f50e38c9966076465bc8d9dd0bc582c268a0031e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 15 Sep 2016 13:56:10 -0700 Subject: regmap: Allow longer flag masks for read and write We currently only support masking the top bit for read and write flags. Let's make the mask unsigned long and mask the bytes based on the configured register length to make things more generic. This allows using regmap for more exotic combinations like SPI devices that need little endian addressing. Signed-off-by: Tony Lindgren Signed-off-by: Mark Brown --- include/linux/regmap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 2c12cc5af744..9adc7b21903d 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -241,9 +241,9 @@ typedef void (*regmap_unlock)(void *); * register cache support). * @num_reg_defaults: Number of elements in reg_defaults. * - * @read_flag_mask: Mask to be set in the top byte of the register when doing + * @read_flag_mask: Mask to be set in the top bytes of the register when doing * a read. - * @write_flag_mask: Mask to be set in the top byte of the register when doing + * @write_flag_mask: Mask to be set in the top bytes of the register when doing * a write. If both read_flag_mask and write_flag_mask are * empty the regmap_bus default masks are used. * @use_single_rw: If set, converts the bulk read and write operations into @@ -299,8 +299,8 @@ struct regmap_config { const void *reg_defaults_raw; unsigned int num_reg_defaults_raw; - u8 read_flag_mask; - u8 write_flag_mask; + unsigned long read_flag_mask; + unsigned long write_flag_mask; bool use_single_rw; bool can_multi_write; -- cgit v1.2.3 From 235539b48a2357da28f52d66d04bec04f3dcb9dd Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Wed, 7 Sep 2016 14:47:23 -0400 Subject: kvm: add stubs for arch specific debugfs support Two stubs are added: o kvm_arch_has_vcpu_debugfs(): must return true if the arch supports creating debugfs entries in the vcpu debugfs dir (which will be implemented by the next commit) o kvm_arch_create_vcpu_debugfs(): code that creates debugfs entries in the vcpu debugfs dir For x86, this commit introduces a new file to avoid growing arch/x86/kvm/x86.c even more. Signed-off-by: Luiz Capitulino Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9c28b4d4c90b..5486ff9aa71e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -749,6 +749,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); +bool kvm_arch_has_vcpu_debugfs(void); +int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); + int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); -- cgit v1.2.3 From 45b5939e50746b92fd4cb47c02524f79ba8fabe6 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Fri, 16 Sep 2016 10:27:35 -0400 Subject: kvm: create per-vcpu dirs in debugfs This commit adds the ability for archs to export per-vcpu information via a new per-vcpu dir in the VM's debugfs directory. If kvm_arch_has_vcpu_debugfs() returns true, then KVM will create a vcpu dir for each vCPU in the VM's debugfs directory. Then kvm_arch_create_vcpu_debugfs() is responsible for populating each vcpu directory with arch specific entries. The per-vcpu path in debugfs will look like: /sys/kernel/debug/kvm/29162-10/vcpu0 /sys/kernel/debug/kvm/29162-10/vcpu1 This is all arch specific for now because the only user of this interface (x86) wants to export x86-specific per-vcpu information to user-space. Signed-off-by: Luiz Capitulino Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5486ff9aa71e..01c0b9cc3915 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -265,6 +265,7 @@ struct kvm_vcpu { #endif bool preempted; struct kvm_vcpu_arch arch; + struct dentry *debugfs_dentry; }; static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 85023b2e1325826edf5d226a9cb4d809ed1e2024 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 14 Sep 2016 17:32:31 -0500 Subject: arm64: pmu: Hoist pmu platform device name Move the PMU name into a common header file so it may be referenced by other users. Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index dc1f2f30c961..9ff07d3fc8de 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -160,6 +160,8 @@ int arm_pmu_device_probe(struct platform_device *pdev, const struct of_device_id *of_table, const struct pmu_probe_info *probe_table); +#define ARMV8_PMU_PDEV_NAME "armv8-pmu" + #endif /* CONFIG_ARM_PMU */ #endif /* __ARM_PMU_H__ */ -- cgit v1.2.3 From 73e705bf81ceb84b39ef9cf6ffb8d12ca0c58a23 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 14 Sep 2016 09:52:08 -0700 Subject: regulator: core: Add set_voltage_time op The new op is analogous to set_voltage_time_sel. It can be used by regulators which don't have a table of discrete voltages. The function returns the time for the regulator output voltage to stabilize after being set to a new value, in microseconds. If the op is not set a default implementation is used to calculate the delay. This change also removes the ramp_delay calculation in the PWM regulator, since the driver now uses the core code for the calculation of the delay. Signed-off-by: Matthias Kaehlcke Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index fcfa40a6692c..37b532410528 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -113,10 +113,14 @@ struct regulator_linear_range { * stabilise after being enabled, in microseconds. * @set_ramp_delay: Set the ramp delay for the regulator. The driver should * select ramp delay equal to or less than(closest) ramp_delay. + * @set_voltage_time: Time taken for the regulator voltage output voltage + * to stabilise after being set to a new value, in microseconds. + * The function receives the from and to voltage as input, it + * should return the worst case. * @set_voltage_time_sel: Time taken for the regulator voltage output voltage * to stabilise after being set to a new value, in microseconds. - * The function provides the from and to voltage selector, the - * function should return the worst case. + * The function receives the from and to voltage selector as + * input, it should return the worst case. * @set_soft_start: Enable soft start for the regulator. * * @set_suspend_voltage: Set the voltage for the regulator when the system @@ -168,6 +172,8 @@ struct regulator_ops { /* Time taken to enable or set voltage on the regulator */ int (*enable_time) (struct regulator_dev *); int (*set_ramp_delay) (struct regulator_dev *, int ramp_delay); + int (*set_voltage_time) (struct regulator_dev *, int old_uV, + int new_uV); int (*set_voltage_time_sel) (struct regulator_dev *, unsigned int old_selector, unsigned int new_selector); -- cgit v1.2.3 From afd29f9017a271fb048b69275975c5451fd0e674 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 15 Sep 2016 11:07:03 +0300 Subject: PCI: Add pci_find_resource() Add a new helper function pci_find_resource() that can be used to find out whether a given resource (for example from a child device) is contained within given PCI device's standard resources. Signed-off-by: Mika Westerberg Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- include/linux/pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0ab835965669..a917d4b20554 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1126,6 +1126,7 @@ void pdev_enable_device(struct pci_dev *); int pci_enable_resources(struct pci_dev *, int mask); void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(const struct pci_dev *, u8, u8)); +struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res); #define HAVE_PCI_REQ_REGIONS 2 int __must_check pci_request_regions(struct pci_dev *, const char *); int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *); @@ -1542,6 +1543,9 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) { return 0; } +static inline struct resource *pci_find_resource(struct pci_dev *dev, + struct resource *res) +{ return NULL; } static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; } static inline void pci_release_regions(struct pci_dev *dev) { } -- cgit v1.2.3 From 88459642cba452630326b9cab1c651e09577d4e4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 17 Sep 2016 08:38:44 -0600 Subject: blk-mq: abstract tag allocation out into sbitmap library This is a generally useful data structure, so make it available to anyone else who might want to use it. It's also a nice cleanup separating the allocation logic from the rest of the tag handling logic. The code is behind a new Kconfig option, CONFIG_SBITMAP, which is only selected by CONFIG_BLOCK for now. This should be a complete noop functionality-wise. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 9 +- include/linux/sbitmap.h | 327 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 329 insertions(+), 7 deletions(-) create mode 100644 include/linux/sbitmap.h (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 60ef14cbcd2d..2575779cf13f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -2,6 +2,7 @@ #define BLK_MQ_H #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -12,12 +13,6 @@ struct blk_mq_cpu_notifier { int (*notify)(void *data, unsigned long action, unsigned int cpu); }; -struct blk_mq_ctxmap { - unsigned int size; - unsigned int bits_per_word; - struct blk_align_bitmap *map; -}; - struct blk_mq_hw_ctx { struct { spinlock_t lock; @@ -37,7 +32,7 @@ struct blk_mq_hw_ctx { void *driver_data; - struct blk_mq_ctxmap ctx_map; + struct sbitmap ctx_map; struct blk_mq_ctx **ctxs; unsigned int nr_ctx; diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h new file mode 100644 index 000000000000..1a3b836042e1 --- /dev/null +++ b/include/linux/sbitmap.h @@ -0,0 +1,327 @@ +/* + * Fast and scalable bitmaps. + * + * Copyright (C) 2016 Facebook + * Copyright (C) 2013-2014 Jens Axboe + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __LINUX_SCALE_BITMAP_H +#define __LINUX_SCALE_BITMAP_H + +#include +#include + +/** + * struct sbitmap_word - Word in a &struct sbitmap. + */ +struct sbitmap_word { + /** + * @word: The bitmap word itself. + */ + unsigned long word; + + /** + * @depth: Number of bits being used in @word. + */ + unsigned long depth; +} ____cacheline_aligned_in_smp; + +/** + * struct sbitmap - Scalable bitmap. + * + * A &struct sbitmap is spread over multiple cachelines to avoid ping-pong. This + * trades off higher memory usage for better scalability. + */ +struct sbitmap { + /** + * @depth: Number of bits used in the whole bitmap. + */ + unsigned int depth; + + /** + * @shift: log2(number of bits used per word) + */ + unsigned int shift; + + /** + * @map_nr: Number of words (cachelines) being used for the bitmap. + */ + unsigned int map_nr; + + /** + * @map: Allocated bitmap. + */ + struct sbitmap_word *map; +}; + +#define SBQ_WAIT_QUEUES 8 +#define SBQ_WAKE_BATCH 8 + +/** + * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue. + */ +struct sbq_wait_state { + /** + * @wait_cnt: Number of frees remaining before we wake up. + */ + atomic_t wait_cnt; + + /** + * @wait: Wait queue. + */ + wait_queue_head_t wait; +} ____cacheline_aligned_in_smp; + +/** + * struct sbitmap_queue - Scalable bitmap with the added ability to wait on free + * bits. + * + * A &struct sbitmap_queue uses multiple wait queues and rolling wakeups to + * avoid contention on the wait queue spinlock. This ensures that we don't hit a + * scalability wall when we run out of free bits and have to start putting tasks + * to sleep. + */ +struct sbitmap_queue { + /** + * @sb: Scalable bitmap. + */ + struct sbitmap sb; + + /** + * @wake_batch: Number of bits which must be freed before we wake up any + * waiters. + */ + unsigned int wake_batch; + + /** + * @wake_index: Next wait queue in @ws to wake up. + */ + atomic_t wake_index; + + /** + * @ws: Wait queues. + */ + struct sbq_wait_state *ws; +}; + +/** + * sbitmap_init_node() - Initialize a &struct sbitmap on a specific memory node. + * @sb: Bitmap to initialize. + * @depth: Number of bits to allocate. + * @shift: Use 2^@shift bits per word in the bitmap; if a negative number if + * given, a good default is chosen. + * @flags: Allocation flags. + * @node: Memory node to allocate on. + * + * Return: Zero on success or negative errno on failure. + */ +int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, + gfp_t flags, int node); + +/** + * sbitmap_free() - Free memory used by a &struct sbitmap. + * @sb: Bitmap to free. + */ +static inline void sbitmap_free(struct sbitmap *sb) +{ + kfree(sb->map); + sb->map = NULL; +} + +/** + * sbitmap_resize() - Resize a &struct sbitmap. + * @sb: Bitmap to resize. + * @depth: New number of bits to resize to. + * + * Doesn't reallocate anything. It's up to the caller to ensure that the new + * depth doesn't exceed the depth that the sb was initialized with. + */ +void sbitmap_resize(struct sbitmap *sb, unsigned int depth); + +/** + * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap. + * @sb: Bitmap to allocate from. + * @alloc_hint: Hint for where to start searching for a free bit. + * @round_robin: If true, be stricter about allocation order; always allocate + * starting from the last allocated bit. This is less efficient + * than the default behavior (false). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); + +/** + * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. + * @sb: Bitmap to check. + * + * Return: true if any bit in the bitmap is set, false otherwise. + */ +bool sbitmap_any_bit_set(const struct sbitmap *sb); + +/** + * sbitmap_any_bit_clear() - Check for an unset bit in a &struct + * sbitmap. + * @sb: Bitmap to check. + * + * Return: true if any bit in the bitmap is clear, false otherwise. + */ +bool sbitmap_any_bit_clear(const struct sbitmap *sb); + +typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *); + +/** + * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. + * @sb: Bitmap to iterate over. + * @fn: Callback. Should return true to continue or false to break early. + * @data: Pointer to pass to callback. + * + * This is inline even though it's non-trivial so that the function calls to the + * callback will hopefully get optimized away. + */ +static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn, + void *data) +{ + unsigned int i; + + for (i = 0; i < sb->map_nr; i++) { + struct sbitmap_word *word = &sb->map[i]; + unsigned int off, nr; + + if (!word->word) + continue; + + nr = 0; + off = i << sb->shift; + while (1) { + nr = find_next_bit(&word->word, word->depth, nr); + if (nr >= word->depth) + break; + + if (!fn(sb, off + nr, data)) + return; + + nr++; + } + } +} + +#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift) +#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U)) + +static inline unsigned long *__sbitmap_word(struct sbitmap *sb, + unsigned int bitnr) +{ + return &sb->map[SB_NR_TO_INDEX(sb, bitnr)].word; +} + +/* Helpers equivalent to the operations in asm/bitops.h and linux/bitmap.h */ + +static inline void sbitmap_set_bit(struct sbitmap *sb, unsigned int bitnr) +{ + set_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); +} + +static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) +{ + clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); +} + +static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) +{ + return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); +} + +unsigned int sbitmap_weight(const struct sbitmap *sb); + +/** + * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific + * memory node. + * @sbq: Bitmap queue to initialize. + * @depth: See sbitmap_init_node(). + * @shift: See sbitmap_init_node(). + * @flags: Allocation flags. + * @node: Memory node to allocate on. + * + * Return: Zero on success or negative errno on failure. + */ +int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, + int shift, gfp_t flags, int node); + +/** + * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue. + * + * @sbq: Bitmap queue to free. + */ +static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) +{ + kfree(sbq->ws); + sbitmap_free(&sbq->sb); +} + +/** + * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. + * @sbq: Bitmap queue to resize. + * @depth: New number of bits to resize to. + * + * Like sbitmap_resize(), this doesn't reallocate anything. It has to do + * some extra work on the &struct sbitmap_queue, so it's not safe to just + * resize the underlying &struct sbitmap. + */ +void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); + +/** + * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a + * &struct sbitmap_queue. + * @sbq: Bitmap to free from. + * @nr: Bit number to free. + */ +void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr); + +static inline int sbq_index_inc(int index) +{ + return (index + 1) & (SBQ_WAIT_QUEUES - 1); +} + +static inline void sbq_index_atomic_inc(atomic_t *index) +{ + int old = atomic_read(index); + int new = sbq_index_inc(old); + atomic_cmpxchg(index, old, new); +} + +/** + * sbq_wait_ptr() - Get the next wait queue to use for a &struct + * sbitmap_queue. + * @sbq: Bitmap queue to wait on. + * @wait_index: A counter per "user" of @sbq. + */ +static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, + atomic_t *wait_index) +{ + struct sbq_wait_state *ws; + + ws = &sbq->ws[atomic_read(wait_index)]; + sbq_index_atomic_inc(wait_index); + return ws; +} + +/** + * sbitmap_queue_wake_all() - Wake up everything waiting on a &struct + * sbitmap_queue. + * @sbq: Bitmap queue to wake up. + */ +void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); + +#endif /* __LINUX_SCALE_BITMAP_H */ -- cgit v1.2.3 From 40aabb67464d5aad9ca3d2a5fedee56e2ff45aa0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 17 Sep 2016 01:28:23 -0700 Subject: sbitmap: push per-cpu last_tag into sbitmap_queue Allocating your own per-cpu allocation hint separately makes for an awkward API. Instead, allocate the per-cpu hint as part of the struct sbitmap_queue. There's no point for a struct sbitmap_queue without the cache, but you can still use a bare struct sbitmap. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 1a3b836042e1..6745545e0b22 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -99,6 +99,14 @@ struct sbitmap_queue { */ struct sbitmap sb; + /* + * @alloc_hint: Cache of last successfully allocated or freed bit. + * + * This is per-cpu, which allows multiple users to stick to different + * cachelines until the map is exhausted. + */ + unsigned int __percpu *alloc_hint; + /** * @wake_batch: Number of bits which must be freed before we wake up any * waiters. @@ -267,6 +275,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) { kfree(sbq->ws); + free_percpu(sbq->alloc_hint); sbitmap_free(&sbq->sb); } @@ -281,13 +290,47 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) */ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); +/** + * __sbitmap_queue_get() - Try to allocate a free bit from a &struct + * sbitmap_queue with preemption already disabled. + * @sbq: Bitmap queue to allocate from. + * @round_robin: See sbitmap_get(). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +int __sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin); + +/** + * sbitmap_queue_get() - Try to allocate a free bit from a &struct + * sbitmap_queue. + * @sbq: Bitmap queue to allocate from. + * @round_robin: See sbitmap_get(). + * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to + * sbitmap_queue_clear()). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin, + unsigned int *cpu) +{ + int nr; + + *cpu = get_cpu(); + nr = __sbitmap_queue_get(sbq, round_robin); + put_cpu(); + return nr; +} + /** * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @nr: Bit number to free. + * @round_robin: See sbitmap_get(). + * @cpu: CPU the bit was allocated on. */ -void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr); +void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, + bool round_robin, unsigned int cpu); static inline int sbq_index_inc(int index) { -- cgit v1.2.3 From f4a644db86669d938c71f19560aebf69d4720d63 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 17 Sep 2016 01:28:24 -0700 Subject: sbitmap: push alloc policy into sbitmap_queue Again, there's no point in passing this in every time. Make it part of struct sbitmap_queue and clean up the API. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 6745545e0b22..f017fd6e69c4 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -122,6 +122,11 @@ struct sbitmap_queue { * @ws: Wait queues. */ struct sbq_wait_state *ws; + + /** + * @round_robin: Allocate bits in strict round-robin order. + */ + bool round_robin; }; /** @@ -259,13 +264,14 @@ unsigned int sbitmap_weight(const struct sbitmap *sb); * @sbq: Bitmap queue to initialize. * @depth: See sbitmap_init_node(). * @shift: See sbitmap_init_node(). + * @round_robin: See sbitmap_get(). * @flags: Allocation flags. * @node: Memory node to allocate on. * * Return: Zero on success or negative errno on failure. */ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, - int shift, gfp_t flags, int node); + int shift, bool round_robin, gfp_t flags, int node); /** * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue. @@ -294,29 +300,27 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); * __sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue with preemption already disabled. * @sbq: Bitmap queue to allocate from. - * @round_robin: See sbitmap_get(). * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int __sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin); +int __sbitmap_queue_get(struct sbitmap_queue *sbq); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue. * @sbq: Bitmap queue to allocate from. - * @round_robin: See sbitmap_get(). * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to * sbitmap_queue_clear()). * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin, +static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, unsigned int *cpu) { int nr; *cpu = get_cpu(); - nr = __sbitmap_queue_get(sbq, round_robin); + nr = __sbitmap_queue_get(sbq); put_cpu(); return nr; } @@ -326,11 +330,10 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin, * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @nr: Bit number to free. - * @round_robin: See sbitmap_get(). * @cpu: CPU the bit was allocated on. */ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, - bool round_robin, unsigned int cpu); + unsigned int cpu); static inline int sbq_index_inc(int index) { -- cgit v1.2.3 From 5f4e5752a8a3a72c79514def2ad9fc7cd410ce2e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 10:12:45 +1000 Subject: fs: add iomap_file_dirty Originally-From: Christoph Hellwig This function uses the iomap infrastructure to re-write all pages in a given range. This is useful for doing a copy-up of COW ranges, and might be useful for scrubbing in the future. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- include/linux/iomap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 3d70ece10313..3a56212a0a8d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -64,6 +64,8 @@ struct iomap_ops { ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, struct iomap_ops *ops); +int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, + struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, struct iomap_ops *ops); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, -- cgit v1.2.3 From e43c460dcd265431df7a5e481450ad9c0596c10c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 19 Sep 2016 10:13:02 +1000 Subject: iomap: add a flag to report shared extents Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 3a56212a0a8d..c74226a738a3 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -22,6 +22,7 @@ struct vm_fault; * Flags for iomap mappings: */ #define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ +#define IOMAP_F_SHARED 0x02 /* block shared with another file */ /* * Magic value for blkno: -- cgit v1.2.3 From ecd50729f772f0b982ddbb76c16ee4bc8f495e17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:24:37 +1000 Subject: iomap: add IOMAP_F_NEW flag Signed-off-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Dave Chinner --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 3d70ece10313..14d7067f8b06 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -22,6 +22,7 @@ struct vm_fault; * Flags for iomap mappings: */ #define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ +#define IOMAP_F_NEW 0x02 /* blocks have been newly allocated */ /* * Magic value for blkno: -- cgit v1.2.3 From a254e568128804fc2f18490af617197a1d36675e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:24:49 +1000 Subject: dax: provide an iomap based dax read/write path This is a much simpler implementation of the DAX read/write path that makes use of the iomap infrastructure. It does not try to mirror the direct I/O calling conventions and thus doesn't have to deal with i_dio_count or the end_io handler, but instead leaves locking and filesystem-specific I/O completion to the caller. Signed-off-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Dave Chinner --- include/linux/dax.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 9c6dc7704043..a0595b4ddbd8 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -6,9 +6,13 @@ #include #include +struct iomap_ops; + /* We use lowest available exceptional entry bit for locking */ #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) +ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, + struct iomap_ops *ops); ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, get_block_t, dio_iodone_t, int flags); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); -- cgit v1.2.3 From a7d73fe6c538fdba42635c0b8e73382fcd4bd667 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:24:50 +1000 Subject: dax: provide an iomap based fault handler Very similar to the existing dax_fault function, but instead of using the get_block callback we rely on the iomap_ops vector from iomap.c. That also avoids having to do two calls into the file system for write faults. Signed-off-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Dave Chinner --- include/linux/dax.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index a0595b4ddbd8..add6c4bc568f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -17,6 +17,8 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, get_block_t, dio_iodone_t, int flags); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); +int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, + struct iomap_ops *ops); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, -- cgit v1.2.3 From 2c9d85d4d82d9e0a62aad08bf50650804e68ed30 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 16 Sep 2016 15:05:36 +0200 Subject: netdevice: Add offload statistics ndo Add a new ndo to return statistics for offloaded operation. Since there can be many different offloaded operation with many stats types, the ndo gets an attribute id by which it knows which stats are wanted. The ndo also gets a void pointer to be cast according to the attribute id. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2095b6ab3661..a10d8d18ce19 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -924,6 +924,14 @@ struct netdev_xdp { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * + * bool (*ndo_has_offload_stats)(int attr_id) + * Return true if this device supports offload stats of this attr_id. + * + * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, + * void *attr_data) + * Get statistics for offload operations by attr_id. Write it into the + * attr_data pointer. + * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is registered. @@ -1155,6 +1163,10 @@ struct net_device_ops { struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); + bool (*ndo_has_offload_stats)(int attr_id); + int (*ndo_get_offload_stats)(int attr_id, + const struct net_device *dev, + void *attr_data); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, -- cgit v1.2.3 From e8bffe0cf964f0330595bb376b74921cccdaac88 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:13 -0700 Subject: net: Add _nf_(un)register_hooks symbols Add _nf_register_hooks() and _nf_unregister_hooks() calls which allow caller to hold RTNL mutex. Signed-off-by: Mahesh Bandewar CC: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9230f9aee896..e82b76781bf6 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -133,6 +133,8 @@ int nf_register_hook(struct nf_hook_ops *reg); void nf_unregister_hook(struct nf_hook_ops *reg); int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); +int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); +void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ -- cgit v1.2.3 From 3132e49ecef9dab43d858d8e7066662c6a1efb16 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Aug 2016 15:58:24 -0400 Subject: pnfs: track multiple layout types in fsinfo structure Current NFSv4.1/pNFS client assumes that MDS supports only one layout type. While it's true for most existing servers, nevertheless, this can be change in the near future. For now, this patch just plumbs in the ability to track a list of layouts in the fsinfo structure. The existing behavior of the client is preserved, by having it just select the first entry in the list. Signed-off-by: Tigran Mkrtchyan Signed-off-by: Jeff Layton Reviewed-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7cc0deee5bde..f11b26ed001b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -124,6 +124,11 @@ struct nfs_fattr { | NFS_ATTR_FATTR_SPACE_USED \ | NFS_ATTR_FATTR_V4_SECURITY_LABEL) +/* + * Maximal number of supported layout drivers. + */ +#define NFS_MAX_LAYOUT_TYPES 8 + /* * Info on the file system */ @@ -139,7 +144,7 @@ struct nfs_fsinfo { __u64 maxfilesize; struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ - __u32 layouttype; /* supported pnfs layout driver */ + __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ }; -- cgit v1.2.3 From 3b58a8a9049d5e191402665c339690a148504358 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:23 -0400 Subject: SUNRPC rpc_clnt_xprt_switch_put Give the NFS layer access to the xprt_switch_put function Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 5c02b0691587..c12f86b752cb 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -199,5 +199,7 @@ void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo); const char *rpc_proc_name(const struct rpc_task *task); + +void rpc_clnt_xprt_switch_put(struct rpc_clnt *); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From dd69171769cf4649a7ff3755e91cbd242a833727 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:24 -0400 Subject: SUNRPC rpc_clnt_xprt_switch_add_xprt Give the NFS layer access to the rpc_xprt_switch_add_xprt function Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c12f86b752cb..b069d6e2c3d6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -201,5 +201,6 @@ void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, const char *rpc_proc_name(const struct rpc_task *task); void rpc_clnt_xprt_switch_put(struct rpc_clnt *); +void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From 39e5d2df959dd4aea81fa33d765d2a5cc67a0512 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:25 -0400 Subject: SUNRPC search xprt switch for sockaddr Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 2 ++ include/linux/sunrpc/xprtmultipath.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b069d6e2c3d6..35cc539e2921 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -202,5 +202,7 @@ const char *rpc_proc_name(const struct rpc_task *task); void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); +bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, + const struct sockaddr *sap); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index 5a9acffa41be..507418c1c69e 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -66,4 +66,6 @@ extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi); extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi); extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi); +extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap); #endif -- cgit v1.2.3 From fda0ab41170ee0a1c7a3781ff8cfb4395c3dd784 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:26 -0400 Subject: SUNRPC: rpc_clnt_add_xprt setup function for NFS layer Use a setup function to call into the NFS layer to test an rpc_xprt for session trunking so as to not leak the rpc_xprt_switch into the nfs layer. Search for the address in the rpc_xprt_switch first so as not to put an unnecessary EXCHANGE_ID on the wire. Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 35cc539e2921..85cc819676e8 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -125,6 +125,13 @@ struct rpc_create_args { struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ }; +struct rpc_add_xprt_test { + int (*add_xprt_test)(struct rpc_clnt *, + struct rpc_xprt *, + void *calldata); + void *data; +}; + /* Values for "flags" field */ #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) @@ -198,6 +205,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *, void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo); +int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *, + struct rpc_xprt_switch *, + struct rpc_xprt *, + void *); + const char *rpc_proc_name(const struct rpc_task *task); void rpc_clnt_xprt_switch_put(struct rpc_clnt *); -- cgit v1.2.3 From b9c5bc03be6aae41990efd09f83cf70a89ac9f4b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:12 -0400 Subject: SUNRPC: Refactor rpc_xdr_buf_init() Clean up: there is some XDR initialization logic that is common to the forward channel and backchannel. Move it to an XDR header so it can be shared. rpc_rqst::rq_buffer points to a buffer containing big-endian data. Update its annotation as part of the clean up. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 12 ++++++++++++ include/linux/sunrpc/xprt.h | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 70c6b92e15a7..56c48c884a24 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -67,6 +67,18 @@ struct xdr_buf { len; /* Length of XDR encoded message */ }; +static inline void +xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) +{ + buf->head[0].iov_base = start; + buf->head[0].iov_len = len; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->flags = 0; + buf->len = 0; + buf->buflen = len; +} + /* * pre-xdr'ed macros. */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a16070dd03ee..6f1d41b559a3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -83,7 +83,7 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; - __u32 * rq_buffer; /* XDR encode buffer */ + void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize, rq_rcvsize; size_t rq_xmit_bytes_sent; /* total bytes sent */ -- cgit v1.2.3 From 5fe6eaa1f9a00b9a5927e3b791ecad2f3eaab130 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:20 -0400 Subject: SUNRPC: Generalize the RPC buffer allocation API xprtrdma needs to allocate the Call and Reply buffers separately. TBH, the reliance on using a single buffer for the pair of XDR buffers is transport implementation-specific. Transports that want to allocate separate Call and Reply buffers will ignore the "size" argument anyway. Don't bother passing it. The buf_alloc method can't return two pointers. Instead, make the method's return value an error code, and set the rq_buffer pointer in the method itself. This gives call_allocate an opportunity to terminate an RPC instead of looping forever when a permanent problem occurs. If a request is just bogus, or the transport is in a state where it can't allocate resources for any request, there needs to be a way to kill the RPC right there and not loop. This immediately fixes a rare problem in the backchannel send path, which loops if the server happens to send a CB request whose call+reply size is larger than a page (which it shouldn't do yet). One more issue: looks like xprt_inject_disconnect was incorrectly placed in the failure path in call_allocate. It needs to be in the success path, as it is for other call-sites. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 2 +- include/linux/sunrpc/xprt.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 817af0b4385e..38d4c1b378f2 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -239,7 +239,7 @@ struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, void *); void rpc_wake_up_status(struct rpc_wait_queue *, int); void rpc_delay(struct rpc_task *, unsigned long); -void * rpc_malloc(struct rpc_task *, size_t); +int rpc_malloc(struct rpc_task *); void rpc_free(void *); int rpciod_up(void); void rpciod_down(void); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 6f1d41b559a3..c01f468fb374 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -127,7 +127,7 @@ struct rpc_xprt_ops { void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); - void * (*buf_alloc)(struct rpc_task *task, size_t size); + int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(void *buffer); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); -- cgit v1.2.3 From 3435c74aed2d7b743ccbf34616c523ebee7be943 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:29 -0400 Subject: SUNRPC: Generalize the RPC buffer release API xprtrdma needs to allocate the Call and Reply buffers separately. TBH, the reliance on using a single buffer for the pair of XDR buffers is transport implementation-specific. Instead of passing just the rq_buffer into the buf_free method, pass the task structure and let buf_free take care of freeing both XDR buffers at once. There's a micro-optimization here. In the common case, both xprt_release and the transport's buf_free method were checking if rq_buffer was NULL. Now the check is done only once per RPC. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 2 +- include/linux/sunrpc/xprt.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 38d4c1b378f2..7ba040c797ec 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -240,7 +240,7 @@ struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, void rpc_wake_up_status(struct rpc_wait_queue *, int); void rpc_delay(struct rpc_task *, unsigned long); int rpc_malloc(struct rpc_task *); -void rpc_free(void *); +void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c01f468fb374..72c2aebc592b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -128,7 +128,7 @@ struct rpc_xprt_ops { void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); int (*buf_alloc)(struct rpc_task *task); - void (*buf_free)(void *buffer); + void (*buf_free)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); -- cgit v1.2.3 From 68778945e46f143ed7974b427a8065f69a4ce944 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:37 -0400 Subject: SUNRPC: Separate buffer pointers for RPC Call and Reply messages For xprtrdma, the RPC Call and Reply buffers are involved in real I/O operations. To start with, the DMA direction of the I/O for a Call is opposite that of a Reply. In the current arrangement, the Reply buffer address is on a four-byte alignment just past the call buffer. Would be friendlier on some platforms if that was at a DMA cache alignment instead. Because the current arrangement allocates a single memory region which contains both buffers, the RPC Reply buffer often contains a page boundary in it when the Call buffer is large enough (which is frequent). It would be a little nicer for setting up DMA operations (and possible registration of the Reply buffer) if the two buffers were separated, well-aligned, and contained as few page boundaries as possible. Now, I could just pad out the single memory region used for the pair of buffers. But frequently that would mean a lot of unused space to ensure the Reply buffer did not have a page boundary. Add a separate pointer to rpc_rqst that points right to the RPC Reply buffer. This makes no difference to xprtsock, but it will help xprtrdma in subsequent patches. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 72c2aebc592b..46f069efa056 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -84,8 +84,9 @@ struct rpc_rqst { struct list_head rq_list; void *rq_buffer; /* Call XDR encode buffer */ - size_t rq_callsize, - rq_rcvsize; + size_t rq_callsize; + void *rq_rbuffer; /* Reply XDR decode buffer */ + size_t rq_rcvsize; size_t rq_xmit_bytes_sent; /* total bytes sent */ size_t rq_reply_bytes_recvd; /* total reply bytes */ /* received */ -- cgit v1.2.3 From 5a6d1db4556940533f1a5b6521e522f3e46508ed Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:45 -0400 Subject: SUNRPC: Add a transport-specific private field in rpc_rqst Currently there's a hidden and indirect mechanism for finding the rpcrdma_req that goes with an rpc_rqst. It depends on getting from the rq_buffer pointer in struct rpc_rqst to the struct rpcrdma_regbuf that controls that buffer, and then to the struct rpcrdma_req it goes with. This was done back in the day to avoid the need to add a per-rqst pointer or to alter the buf_free API when support for RPC-over-RDMA was introduced. I'm about to change the way regbuf's work to support larger inline thresholds. Now is a good time to replace this indirect mechanism with something that is more straightforward. I guess this should be considered a clean up. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 46f069efa056..a5da60b24d83 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -83,6 +83,7 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; + void *rq_xprtdata; /* Per-xprt private data */ void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; void *rq_rbuffer; /* Reply XDR decode buffer */ -- cgit v1.2.3 From ff06bd191e722393d9abf7d6f9767f195274e909 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:56:59 -0400 Subject: rpcrdma: RDMA/CM private message data structure Introduce data structure used by both client and server to exchange implementation details during RDMA/CM connection establishment. This is an experimental out-of-band exchange between Linux RPC-over-RDMA Version One implementations, replacing the deprecated CCP (see RFC 5666bis). The purpose of this extension is to enable prototyping of features that might be introduced in a subsequent version of RPC-over-RDMA. Suggested by Christoph Hellwig and Devesh Sharma. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rpc_rdma.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 3b1ff38f0c37..a7da6bf56610 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -41,6 +41,7 @@ #define _LINUX_SUNRPC_RPC_RDMA_H #include +#include #define RPCRDMA_VERSION 1 #define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) @@ -129,4 +130,38 @@ enum rpcrdma_proc { #define rdma_done cpu_to_be32(RDMA_DONE) #define rdma_error cpu_to_be32(RDMA_ERROR) +/* + * Private extension to RPC-over-RDMA Version One. + * Message passed during RDMA-CM connection set-up. + * + * Add new fields at the end, and don't permute existing + * fields. + */ +struct rpcrdma_connect_private { + __be32 cp_magic; + u8 cp_version; + u8 cp_flags; + u8 cp_send_size; + u8 cp_recv_size; +} __packed; + +#define rpcrdma_cmp_magic __cpu_to_be32(0xf6ab0e18) + +enum { + RPCRDMA_CMP_VERSION = 1, + RPCRDMA_CMP_F_SND_W_INV_OK = BIT(0), +}; + +static inline u8 +rpcrdma_encode_buffer_size(unsigned int size) +{ + return (size >> 10) - 1; +} + +static inline unsigned int +rpcrdma_decode_buffer_size(u8 val) +{ + return ((unsigned int)val + 1) << 10; +} + #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ -- cgit v1.2.3 From 87cfb9a0c85ce4a0c96a4f3d692a85519b933ade Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:57:07 -0400 Subject: xprtrdma: Client-side support for rpcrdma_connect_private Send an RDMA-CM private message on connect, and look for one during a connection-established event. Both sides can communicate their various implementation limits. Implementations that don't support this sideband protocol ignore it. Once the client knows the server's inline threshold maxima, it can adjust the use of Reply chunks, and eliminate most use of Position Zero Read chunks. Moderately-sized I/O can be done using a pure inline RDMA Send instead of RDMA operations that require memory registration. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rpc_rdma.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index a7da6bf56610..cfda6adcf33c 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -46,6 +46,10 @@ #define RPCRDMA_VERSION 1 #define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) +enum { + RPCRDMA_V1_DEF_INLINE_SIZE = 1024, +}; + struct rpcrdma_segment { __be32 rs_handle; /* Registered memory handle */ __be32 rs_length; /* Length of the chunk in bytes */ -- cgit v1.2.3 From 44829d02d2d7a7064842ecf36239ea24df1cdf58 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:57:32 -0400 Subject: xprtrdma: Support larger inline thresholds The Version One default inline threshold is still 1KB. But allow testing with thresholds up to 64KB. This maximum is somewhat arbitrary. There's no fundamental architectural limit I'm aware of, but it's good to keep the size of Receive buffers reasonable. Now that Send can use a s/g list, a Send buffer is only as large as each RPC requires. Receive buffers are always the size of the inline threshold, however. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 39267dc3486a..221b7a2e5406 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -53,8 +53,8 @@ #define RPCRDMA_MAX_SLOT_TABLE (256U) #define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ -#define RPCRDMA_DEF_INLINE (1024) /* default inline thresh */ -#define RPCRDMA_MAX_INLINE (3068) /* max inline thresh */ +#define RPCRDMA_DEF_INLINE (4096) /* default inline thresh */ +#define RPCRDMA_MAX_INLINE (65536) /* max inline thresh */ /* Memory registration strategies, by number. * This is part of a kernel / user space API. Do not remove. */ -- cgit v1.2.3 From ca440c383a588091cae9fbce610b86a6e9d961ad Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 15 Sep 2016 14:40:49 -0400 Subject: pnfs: add a new mechanism to select a layout driver according to an ordered list Currently, the layout driver selection code always chooses the first one from the list. That's not really ideal however, as the server can send the list of layout types in any order that it likes. It's up to the client to select the best one for its needs. This patch adds an ordered list of preferred driver types and has the selection code sort the list of available layout drivers according to it. Any unrecognized layout type is sorted to the end of the list. For now, the order of preference is hardcoded, but it should be possible to make this configurable in the future. Signed-off-by: Jeff Layton Reviewed-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f11b26ed001b..beb1e10f446e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -144,6 +144,7 @@ struct nfs_fsinfo { __u64 maxfilesize; struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ + __u32 nlayouttypes; /* number of layouttypes */ __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ -- cgit v1.2.3 From 43af5de74288a7cdc3684902c5259346ae67adf8 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 9 Sep 2016 11:37:49 -0400 Subject: lsm,audit,selinux: Introduce a new audit data type LSM_AUDIT_DATA_FILE Right now LSM_AUDIT_DATA_PATH type contains "struct path" in union "u" of common_audit_data. This information is used to print path of file at the same time it is also used to get to dentry and inode. And this inode information is used to get to superblock and device and print device information. This does not work well for layered filesystems like overlay where dentry contained in path is overlay dentry and not the real dentry of underlying file system. That means inode retrieved from dentry is also overlay inode and not the real inode. SELinux helpers like file_path_has_perm() are doing checks on inode retrieved from file_inode(). This returns the real inode and not the overlay inode. That means we are doing check on real inode but for audit purposes we are printing details of overlay inode and that can be confusing while debugging. Hence, introduce a new type LSM_AUDIT_DATA_FILE which carries file information and inode retrieved is real inode using file_inode(). That way right avc denied information is given to user. For example, following is one example avc before the patch. type=AVC msg=audit(1473360868.399:214): avc: denied { read open } for pid=1765 comm="cat" path="/root/.../overlay/container1/merged/readfile" dev="overlay" ino=21443 scontext=unconfined_u:unconfined_r:test_overlay_client_t:s0:c10,c20 tcontext=unconfined_u:object_r:test_overlay_files_ro_t:s0 tclass=file permissive=0 It looks as follows after the patch. type=AVC msg=audit(1473360017.388:282): avc: denied { read open } for pid=2530 comm="cat" path="/root/.../overlay/container1/merged/readfile" dev="dm-0" ino=2377915 scontext=unconfined_u:unconfined_r:test_overlay_client_t:s0:c10,c20 tcontext=unconfined_u:object_r:test_overlay_files_ro_t:s0 tclass=file permissive=0 Notice that now dev information points to "dm-0" device instead of "overlay" device. This makes it clear that check failed on underlying inode and not on the overlay inode. Signed-off-by: Vivek Goyal [PM: slight tweaks to the description to make checkpatch.pl happy] Signed-off-by: Paul Moore --- include/linux/lsm_audit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index ffb9c9da4f39..e58e577117b6 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -59,6 +59,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_INODE 9 #define LSM_AUDIT_DATA_DENTRY 10 #define LSM_AUDIT_DATA_IOCTL_OP 11 +#define LSM_AUDIT_DATA_FILE 12 union { struct path path; struct dentry *dentry; @@ -75,6 +76,7 @@ struct common_audit_data { #endif char *kmod_name; struct lsm_ioctlop_audit *op; + struct file *file; } u; /* this union contains LSM specific data */ union { -- cgit v1.2.3 From c23a7266e6599e74305cc5b790f93398bb212380 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:37 +0200 Subject: arm64/FP/SIMD: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Will Deacon Cc: Peter Zijlstra Cc: Catalin Marinas Cc: rt@linutronix.de Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20160906170457.32393-2-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index afd59e2ca4b3..0da071ff36d2 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -20,6 +20,7 @@ enum cpuhp_state { CPUHP_SOFTIRQ_DEAD, CPUHP_NET_MVNETA_DEAD, CPUHP_CPUIDLE_DEAD, + CPUHP_ARM64_FPSIMD_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 657ebf7a2354f39cc7d3f4e64ee49dbf1c3cae4f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:38 +0200 Subject: ARM/shmobile: Convert to hotplug state machine Install the callbacks via the state machine so the old notifier based cpuhotplug infrastructure can be removed. Signed-off-by: Sebastian Andrzej Siewior Cc: linux-sh@vger.kernel.org Cc: Peter Zijlstra Cc: Magnus Damm Cc: Simon Horman Cc: rt@linutronix.de Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20160906170457.32393-3-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0da071ff36d2..008eed0c0787 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -35,6 +35,7 @@ enum cpuhp_state { CPUHP_POWERPC_PMAC_PREPARE, CPUHP_POWERPC_MMU_CTX_PREPARE, CPUHP_NOTIFY_PREPARE, + CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, -- cgit v1.2.3 From a4fa9cc220fef29162d38a0ada71f5569a116087 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:39 +0200 Subject: ARM/OMAP/wakeupgen: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Tony Lindgren Cc: Peter Zijlstra Cc: rt@linutronix.de Cc: linux-omap@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20160906170457.32393-4-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 008eed0c0787..35859aafbbfc 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -21,6 +21,7 @@ enum cpuhp_state { CPUHP_NET_MVNETA_DEAD, CPUHP_CPUIDLE_DEAD, CPUHP_ARM64_FPSIMD_DEAD, + CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 515332336be71d014bca1d29369c5d72baa38f71 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:41 +0200 Subject: sh/SH-X3 SMP: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: linux-sh@vger.kernel.org Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160906170457.32393-6-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 35859aafbbfc..8dec2a236af3 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -37,6 +37,7 @@ enum cpuhp_state { CPUHP_POWERPC_MMU_CTX_PREPARE, CPUHP_NOTIFY_PREPARE, CPUHP_ARM_SHMOBILE_SCU_PREPARE, + CPUHP_SH_SH3X_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, -- cgit v1.2.3 From 75e12ed65312a56401f3b286ac7e12994301371c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:43 +0200 Subject: lib/irq_poll: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Jens Axboe Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160906170457.32393-8-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8dec2a236af3..2ca7b34871e0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -22,6 +22,7 @@ enum cpuhp_state { CPUHP_CPUIDLE_DEAD, CPUHP_ARM64_FPSIMD_DEAD, CPUHP_ARM_OMAP_WAKE_DEAD, + CPUHP_IRQ_POLL_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 9a659f43dfea27cca423d4e80809be447f4c9ce7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:44 +0200 Subject: block/softirq: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Jens Axboe Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160906170457.32393-9-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2ca7b34871e0..d4274d51fe27 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -23,6 +23,7 @@ enum cpuhp_state { CPUHP_ARM64_FPSIMD_DEAD, CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_IRQ_POLL_DEAD, + CPUHP_BLOCK_SOFTIRQ_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 8904f5a5afc4dd74e8fe2ab3eeb98018ef02f3e6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:46 +0200 Subject: virtio scsi: Convert to hotplug state machine Install the callbacks via the state machine. It uses the multi instance infrastructure of the hotplug code to handle each interface. virtscsi_set_affinity() is removed from virtscsi_init() because virtscsi_cpu_notif_add() (the function which registers the instance) is invoked right after it and the cpuhp_state_add_instance() functions invokes the startup callback on all online CPUs. The same thing can not be applied virtscsi_cpu_notif_remove() because virtscsi_remove_vqs() invokes virtscsi_set_affinity() with affinity = false as argument but the old CPU_DEAD state invoked the function with affinity = true (which does not match the DEAD callback). Signed-off-by: Sebastian Andrzej Siewior Cc: "James E.J. Bottomley" Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: "Michael S. Tsirkin" Cc: Peter Zijlstra Cc: virtualization@lists.linux-foundation.org Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160906170457.32393-11-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d4274d51fe27..e7146ee88ea4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -24,6 +24,7 @@ enum cpuhp_state { CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_IRQ_POLL_DEAD, CPUHP_BLOCK_SOFTIRQ_DEAD, + CPUHP_VIRT_SCSI_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 64f3bf2f85c5690228200d6b94eb6847049af70d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:47 +0200 Subject: ACPI/processor: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Acked-by: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: linux-acpi@vger.kernel.org Cc: rt@linutronix.de Cc: Len Brown Link: http://lkml.kernel.org/r/20160906170457.32393-12-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e7146ee88ea4..7706987c7827 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -25,6 +25,7 @@ enum cpuhp_state { CPUHP_IRQ_POLL_DEAD, CPUHP_BLOCK_SOFTIRQ_DEAD, CPUHP_VIRT_SCSI_DEAD, + CPUHP_ACPI_CPUDRV_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 30e92153b4e6f1cd01e30c34d9ef6f0986f96b0e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:49 +0200 Subject: padata: Convert to hotplug state machine Install the callbacks via the state machine. CPU-hotplug multinstance support is used with the nocalls() version. Maybe parts of padata_alloc() could be moved into the online callback so that we could invoke ->startup callback for instance and drop get_online_cpus(). Signed-off-by: Sebastian Andrzej Siewior Cc: Steffen Klassert Cc: Peter Zijlstra Cc: linux-crypto@vger.kernel.org Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160906170457.32393-14-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/padata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 113ee626a4dc..0f9e567d5e15 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -151,7 +151,7 @@ struct parallel_data { * @flags: padata flags. */ struct padata_instance { - struct notifier_block cpu_notifier; + struct hlist_node node; struct workqueue_struct *wq; struct parallel_data *pd; struct padata_cpumask cpumask; -- cgit v1.2.3 From 8c58898b3ecb213ad7c52aa0c7c9d3201e559be1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:50 +0200 Subject: fault-injection/cpu: Convert to hotplug state machine Install the callbacks via the state machine. This is just a temporary vehicle to keep the interface working for now, It'll be replaced by the sysfs interface which allows to step through the hotplug state machine step by step. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Akinobu Mita Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160906170457.32393-15-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 7706987c7827..bb6231d13d3a 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -43,6 +43,7 @@ enum cpuhp_state { CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, CPUHP_TIMERS_DEAD, + CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, CPUHP_AP_OFFLINE, -- cgit v1.2.3 From dd6d7c6f3dc136c1bec6def840f7fa53f84d1fe6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:51 +0200 Subject: mips/octeon/smp: Convert to hotplug state machine Install the callbacks via the state machine. [ tglx: Renamed the state to MIPS_SOC_PREPARE so it can be reused by other SOCs ] Signed-off-by: Sebastian Andrzej Siewior Acked-by: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: Peter Zijlstra Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160906170457.32393-16-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index bb6231d13d3a..8f8a48bbe86d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -44,6 +44,7 @@ enum cpuhp_state { CPUHP_SH_SH3X_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_NOTF_ERR_INJ_PREPARE, + CPUHP_MIPS_SOC_PREPARE, CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, CPUHP_AP_OFFLINE, -- cgit v1.2.3 From 84c9ceefecb8fe51c4bfa2a5424dd73bc024e41d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:53 +0200 Subject: s390/mm/pfault: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Cc: linux-s390@vger.kernel.org Cc: Peter Zijlstra Cc: Heiko Carstens Cc: rt@linutronix.de Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20160906170457.32393-18-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8f8a48bbe86d..dea6696c673a 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -26,6 +26,7 @@ enum cpuhp_state { CPUHP_BLOCK_SOFTIRQ_DEAD, CPUHP_VIRT_SCSI_DEAD, CPUHP_ACPI_CPUDRV_DEAD, + CPUHP_S390_PFAULT_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From ca26893f05e86497a86732768ec53cd38c0819ca Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Sep 2016 19:00:09 +0800 Subject: rhashtable: Add rhlist interface The insecure_elasticity setting is an ugly wart brought out by users who need to insert duplicate objects (that is, distinct objects with identical keys) into the same table. In fact, those users have a much bigger problem. Once those duplicate objects are inserted, they don't have an interface to find them (unless you count the walker interface which walks over the entire table). Some users have resorted to doing a manual walk over the hash table which is of course broken because they don't handle the potential existence of multiple hash tables. The result is that they will break sporadically when they encounter a hash table resize/rehash. This patch provides a way out for those users, at the expense of an extra pointer per object. Essentially each object is now a list of objects carrying the same key. The hash table will only see the lists so nothing changes as far as rhashtable is concerned. To use this new interface, you need to insert a struct rhlist_head into your objects instead of struct rhash_head. While the hash table is unchanged, for type-safety you'll need to use struct rhltable instead of struct rhashtable. All the existing interfaces have been duplicated for rhlist, including the hash table walker. One missing feature is nulls marking because AFAIK the only potential user of it does not need duplicate objects. Should anyone need this it shouldn't be too hard to add. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 491 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 377 insertions(+), 114 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index fd82584acd48..5c132d3188be 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,7 +1,7 @@ /* * Resizable, Scalable, Concurrent Hash Table * - * Copyright (c) 2015 Herbert Xu + * Copyright (c) 2015-2016 Herbert Xu * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * @@ -53,6 +53,11 @@ struct rhash_head { struct rhash_head __rcu *next; }; +struct rhlist_head { + struct rhash_head rhead; + struct rhlist_head __rcu *next; +}; + /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets @@ -137,6 +142,7 @@ struct rhashtable_params { * @key_len: Key length for hashfn * @elasticity: Maximum chain length before rehash * @p: Configuration parameters + * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list @@ -147,11 +153,20 @@ struct rhashtable { unsigned int key_len; unsigned int elasticity; struct rhashtable_params p; + bool rhlist; struct work_struct run_work; struct mutex mutex; spinlock_t lock; }; +/** + * struct rhltable - Hash table with duplicate objects in a list + * @ht: Underlying rhtable + */ +struct rhltable { + struct rhashtable ht; +}; + /** * struct rhashtable_walker - Hash table walker * @list: List entry on list of walkers @@ -163,9 +178,10 @@ struct rhashtable_walker { }; /** - * struct rhashtable_iter - Hash table iterator, fits into netlink cb + * struct rhashtable_iter - Hash table iterator * @ht: Table to iterate through * @p: Current pointer + * @list: Current hash list pointer * @walker: Associated rhashtable walker * @slot: Current slot * @skip: Number of entries to skip in slot @@ -173,6 +189,7 @@ struct rhashtable_walker { struct rhashtable_iter { struct rhashtable *ht; struct rhash_head *p; + struct rhlist_head *list; struct rhashtable_walker walker; unsigned int slot; unsigned int skip; @@ -339,13 +356,11 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, int rhashtable_init(struct rhashtable *ht, const struct rhashtable_params *params); +int rhltable_init(struct rhltable *hlt, + const struct rhashtable_params *params); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *old_tbl, - void **data); -int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj); void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); @@ -507,6 +522,31 @@ void rhashtable_destroy(struct rhashtable *ht); rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) +/** + * rhl_for_each_rcu - iterate over rcu hash table list + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_rcu(pos, list) \ + for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + +/** + * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * @member: name of the &struct rlist_head within the hashable struct. + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_entry_rcu(tpos, pos, list, member) \ + for (pos = list; pos && rht_entry(tpos, pos, member); \ + pos = rcu_dereference_raw(pos->next)) + static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) { @@ -516,18 +556,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); } -/** - * rhashtable_lookup_fast - search hash table, inlined version - * @ht: hash table - * @key: the pointer to the key - * @params: hash table parameters - * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. - * - * Returns the first entry on which the compare function returned true. - */ -static inline void *rhashtable_lookup_fast( +/* Internal function, do not use. */ +static inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -539,8 +569,6 @@ static inline void *rhashtable_lookup_fast( struct rhash_head *he; unsigned int hash; - rcu_read_lock(); - tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); @@ -549,8 +577,7 @@ restart: params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) continue; - rcu_read_unlock(); - return rht_obj(ht, he); + return he; } /* Ensure we see any new tables. */ @@ -559,96 +586,165 @@ restart: tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(tbl)) goto restart; - rcu_read_unlock(); return NULL; } +/** + * rhashtable_lookup - search hash table + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * This must only be called under the RCU read lock. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(ht, key, params); + + return he ? rht_obj(ht, he) : NULL; +} + +/** + * rhashtable_lookup_fast - search hash table, without RCU read lock + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Only use this function when you have other mechanisms guaranteeing + * that the object won't go away after the RCU read lock is released. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + void *obj; + + rcu_read_lock(); + obj = rhashtable_lookup(ht, key, params); + rcu_read_unlock(); + + return obj; +} + +/** + * rhltable_lookup - search hash list table + * @hlt: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. All matching entries are returned + * in a list. + * + * This must only be called under the RCU read lock. + * + * Returns the list of entries that match the given key. + */ +static inline struct rhlist_head *rhltable_lookup( + struct rhltable *hlt, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params); + + return he ? container_of(he, struct rhlist_head, rhead) : NULL; +} + /* Internal function, please use rhashtable_insert_fast() instead. This * function returns the existing element already in hashes in there is a clash, * otherwise it returns an error via ERR_PTR(). */ static inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, - const struct rhashtable_params params) + const struct rhashtable_params params, bool rhlist) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; - struct bucket_table *tbl, *new_tbl; + struct rhash_head __rcu **pprev; + struct bucket_table *tbl; struct rhash_head *head; spinlock_t *lock; - unsigned int elasticity; unsigned int hash; - void *data = NULL; - int err; + int elasticity; + void *data; -restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - + if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) { +slow_path: spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); + rcu_read_unlock(); + return rhashtable_insert_slow(ht, key, obj); } - new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (unlikely(new_tbl)) { - tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data); - if (!IS_ERR_OR_NULL(tbl)) - goto slow_path; + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *plist; + struct rhlist_head *list; + + elasticity--; + if (!key || + (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; + + data = rht_obj(ht, head); - err = PTR_ERR(tbl); - if (err == -EEXIST) - err = 0; + if (!rhlist) + goto out; - goto out; - } - err = -E2BIG; - if (unlikely(rht_grow_above_max(ht, tbl))) - goto out; + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); - if (unlikely(rht_grow_above_100(ht, tbl))) { -slow_path: - spin_unlock_bh(lock); - err = rhashtable_insert_rehash(ht, tbl); - rcu_read_unlock(); - if (err) - return ERR_PTR(err); + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); - goto restart; + goto good; } - err = 0; - elasticity = ht->elasticity; - rht_for_each(head, tbl, hash) { - if (key && - unlikely(!(params.obj_cmpfn ? - params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head))))) { - data = rht_obj(ht, head); - goto out; - } - if (!--elasticity) - goto slow_path; - } + if (elasticity <= 0) + goto slow_path; + + data = ERR_PTR(-E2BIG); + if (unlikely(rht_grow_above_max(ht, tbl))) + goto out; + + if (unlikely(rht_grow_above_100(ht, tbl))) + goto slow_path; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); @@ -656,11 +752,14 @@ slow_path: if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); +good: + data = NULL; + out: spin_unlock_bh(lock); rcu_read_unlock(); - return err ? ERR_PTR(err) : data; + return data; } /** @@ -685,13 +784,65 @@ static inline int rhashtable_insert_fast( { void *ret; - ret = __rhashtable_insert_fast(ht, NULL, obj, params); + ret = __rhashtable_insert_fast(ht, NULL, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); return ret == NULL ? 0 : -EEXIST; } +/** + * rhltable_insert_key - insert object into hash list table + * @hlt: hash list table + * @key: the pointer to the key + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert_key( + struct rhltable *hlt, const void *key, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead, + params, true)); +} + +/** + * rhltable_insert - insert object into hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + const char *key = rht_obj(&hlt->ht, &list->rhead); + + key += params.key_offset; + + return rhltable_insert_key(hlt, key, list, params); +} + /** * rhashtable_lookup_insert_fast - lookup and insert object into hash table * @ht: hash table @@ -722,7 +873,8 @@ static inline int rhashtable_lookup_insert_fast( BUG_ON(ht->p.obj_hashfn); - ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params); + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, + false); if (IS_ERR(ret)) return PTR_ERR(ret); @@ -759,7 +911,7 @@ static inline int rhashtable_lookup_insert_key( BUG_ON(!ht->p.obj_hashfn || !key); - ret = __rhashtable_insert_fast(ht, key, obj, params); + ret = __rhashtable_insert_fast(ht, key, obj, params, false); if (IS_ERR(ret)) return PTR_ERR(ret); @@ -783,13 +935,14 @@ static inline void *rhashtable_lookup_get_insert_key( { BUG_ON(!ht->p.obj_hashfn || !key); - return __rhashtable_insert_fast(ht, key, obj, params); + return __rhashtable_insert_fast(ht, key, obj, params, false); } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast( +static inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, - struct rhash_head *obj, const struct rhashtable_params params) + struct rhash_head *obj, const struct rhashtable_params params, + bool rhlist) { struct rhash_head __rcu **pprev; struct rhash_head *he; @@ -804,39 +957,66 @@ static inline int __rhashtable_remove_fast( pprev = &tbl->buckets[hash]; rht_for_each(he, tbl, hash) { + struct rhlist_head *list; + + list = container_of(he, struct rhlist_head, rhead); + if (he != obj) { + struct rhlist_head __rcu **lpprev; + pprev = &he->next; - continue; + + if (!rhlist) + continue; + + do { + lpprev = &list->next; + list = rht_dereference_bucket(list->next, + tbl, hash); + } while (list && obj != &list->rhead); + + if (!list) + continue; + + list = rht_dereference_bucket(list->next, tbl, hash); + RCU_INIT_POINTER(*lpprev, list); + err = 0; + break; } - rcu_assign_pointer(*pprev, obj->next); - err = 0; + obj = rht_dereference_bucket(obj->next, tbl, hash); + err = 1; + + if (rhlist) { + list = rht_dereference_bucket(list->next, tbl, hash); + if (list) { + RCU_INIT_POINTER(list->rhead.next, obj); + obj = &list->rhead; + err = 0; + } + } + + rcu_assign_pointer(*pprev, obj); break; } spin_unlock_bh(lock); + if (err > 0) { + atomic_dec(&ht->nelems); + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) + schedule_work(&ht->run_work); + err = 0; + } + return err; } -/** - * rhashtable_remove_fast - remove object from hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * @params: hash table parameters - * - * Since the hash chain is single linked, the removal operation needs to - * walk the bucket chain upon removal. The removal operation is thus - * considerable slow if the hash table is not correctly sized. - * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. - * - * Returns zero on success, -ENOENT if the entry could not be found. - */ -static inline int rhashtable_remove_fast( +/* Internal function, please use rhashtable_remove_fast() instead */ +static inline int __rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, - const struct rhashtable_params params) + const struct rhashtable_params params, bool rhlist) { struct bucket_table *tbl; int err; @@ -850,24 +1030,60 @@ static inline int rhashtable_remove_fast( * visible then that guarantees the entry to still be in * the old tbl if it exists. */ - while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) && + while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params, + rhlist)) && (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) ; - if (err) - goto out; - - atomic_dec(&ht->nelems); - if (unlikely(ht->p.automatic_shrinking && - rht_shrink_below_30(ht, tbl))) - schedule_work(&ht->run_work); - -out: rcu_read_unlock(); return err; } +/** + * rhashtable_remove_fast - remove object from hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + return __rhashtable_remove_fast(ht, obj, params, false); +} + +/** + * rhltable_remove - remove object from hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhltable_remove( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true); +} + /* Internal function, please use rhashtable_replace_fast() instead */ static inline int __rhashtable_replace_fast( struct rhashtable *ht, struct bucket_table *tbl, @@ -958,4 +1174,51 @@ static inline int rhashtable_walk_init(struct rhashtable *ht, return 0; } +/** + * rhltable_walk_enter - Initialise an iterator + * @hlt: Table to walk over + * @iter: Hash table Iterator + * + * This function prepares a hash table walk. + * + * Note that if you restart a walk after rhashtable_walk_stop you + * may see the same object twice. Also, you may miss objects if + * there are removals in between rhashtable_walk_stop and the next + * call to rhashtable_walk_start. + * + * For a completely stable walk you should construct your own data + * structure outside the hash table. + * + * This function may sleep so you must not call it from interrupt + * context or with spin locks held. + * + * You must call rhashtable_walk_exit after this function returns. + */ +static inline void rhltable_walk_enter(struct rhltable *hlt, + struct rhashtable_iter *iter) +{ + return rhashtable_walk_enter(&hlt->ht, iter); +} + +/** + * rhltable_free_and_destroy - free elements and destroy hash list table + * @hlt: the hash list table to destroy + * @free_fn: callback to release resources of element + * @arg: pointer passed to free_fn + * + * See documentation for rhashtable_free_and_destroy. + */ +static inline void rhltable_free_and_destroy(struct rhltable *hlt, + void (*free_fn)(void *ptr, + void *arg), + void *arg) +{ + return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); +} + +static inline void rhltable_destroy(struct rhltable *hlt) +{ + return rhltable_free_and_destroy(hlt, NULL, NULL); +} + #endif /* _LINUX_RHASHTABLE_H */ -- cgit v1.2.3 From e2a738f7a88f32622684d972d654a9fed026555f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 6 Sep 2016 19:04:55 +0200 Subject: blk/mq: Reserve hotplug states for block multiqueue This patch only reserves two CPU hotplug states for block/mq so the block tree can apply the conversion patches. Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Jens Axboe Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160906170457.32393-20-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index dcfe619171b4..2ac07d01bdb5 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -14,6 +14,7 @@ enum cpuhp_state { CPUHP_PERF_SUPERH, CPUHP_X86_HPET_DEAD, CPUHP_X86_APB_DEAD, + CPUHP_BLK_MQ_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -22,6 +23,7 @@ enum cpuhp_state { CPUHP_SMPCFD_PREPARE, CPUHP_RCUTREE_PREP, CPUHP_NOTIFY_PREPARE, + CPUHP_BLK_MQ_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, -- cgit v1.2.3 From b5d5cf2b8a68618a8ec646cab5746e2f539dc244 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 20 Sep 2016 17:20:10 +0200 Subject: parisc: Drop BROKEN_RODATA config option PARISC was the only architecture which selected the BROKEN_RODATA config option. Drop it and remove the special handling from init.h as well. Signed-off-by: Helge Deller --- include/linux/init.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 6935d02474aa..5a3321a7909b 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -41,21 +41,10 @@ discard it in modules) */ #define __init __section(.init.text) __cold notrace #define __initdata __section(.init.data) -#define __initconst __constsection(.init.rodata) +#define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) #define __exit_call __used __section(.exitcall.exit) -/* - * Some architecture have tool chains which do not handle rodata attributes - * correctly. For those disable special sections for const, so that other - * architectures can annotate correctly. - */ -#ifdef CONFIG_BROKEN_RODATA -#define __constsection(x) -#else -#define __constsection(x) __section(x) -#endif - /* * modpost check for section mismatches during the kernel build. * A section mismatch happens when there are references from a @@ -75,7 +64,7 @@ */ #define __ref __section(.ref.text) noinline #define __refdata __section(.ref.data) -#define __refconst __constsection(.ref.rodata) +#define __refconst __section(.ref.rodata) #ifdef MODULE #define __exitused @@ -88,10 +77,10 @@ /* Used for MEMORY_HOTPLUG */ #define __meminit __section(.meminit.text) __cold notrace #define __meminitdata __section(.meminit.data) -#define __meminitconst __constsection(.meminit.rodata) +#define __meminitconst __section(.meminit.rodata) #define __memexit __section(.memexit.text) __exitused __cold notrace #define __memexitdata __section(.memexit.data) -#define __memexitconst __constsection(.memexit.rodata) +#define __memexitconst __section(.memexit.rodata) /* For assembly routines */ #define __HEAD .section ".head.text","ax" -- cgit v1.2.3 From 95c35491f663962e476179076d24d0d2c45a8fb5 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 14 Sep 2016 15:14:45 -0600 Subject: PCI/AER: Remove duplicate AER severity translation Currently the AER severity is being translated twice in the code flow for PCIe errors. It is first translated in ghes_do_proc() before calling into the AER driver. Then it is translated again when the AER driver calls cper_print_aer(). This causes the severity that is used in cper_print_aer() to be incorrect. Remove the second translation that is in cper_print_aer() since this function is already receiving the correct AER severity. Signed-off-by: Tyler Baicar Signed-off-by: Bjorn Helgaas Reviewed-by: Borislav Petkov --- include/linux/aer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index 164049357e5c..04602cbe85dc 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -63,7 +63,7 @@ static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) } #endif -void cper_print_aer(struct pci_dev *dev, int cper_severity, +void cper_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, -- cgit v1.2.3 From 389958bb6be8b08c9f6d350dcaa9fc127123eada Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 20 Sep 2016 09:01:12 +0800 Subject: power: supply: sbs-battery: Cleanup removal of chip->pdata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There where still a few lingering references to pdata after commit power: supply: sbs-battery: simplify DT parsing. Remove pdata from struct·sbs_info and conditional checks to ser if this was set from the i2c read / write functions. Instead of call max in each function for incrementing poll_retry_count do it once in the probe function. Fixup null pointer dereference in to pdata in sbs_external_power_changed. Change retry counts to u32 to avoid need for max. Signed-off-by: Phil Reid Signed-off-by: Sebastian Reichel --- include/linux/power/sbs-battery.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/sbs-battery.h b/include/linux/power/sbs-battery.h index 811f1a0c00cb..519b8b43239a 100644 --- a/include/linux/power/sbs-battery.h +++ b/include/linux/power/sbs-battery.h @@ -31,8 +31,8 @@ * external change notification */ struct sbs_platform_data { - int i2c_retry_count; - int poll_retry_count; + u32 i2c_retry_count; + u32 poll_retry_count; }; #endif -- cgit v1.2.3 From 1d72706f0485b58e151b5a7584c4c65d66670587 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Mon, 19 Sep 2016 20:43:02 -0700 Subject: power: supply: bq27xxx_battery: allow kernel poll_interval parameter runtime update Fix issue with poll_interval being not updated till the previous interval expired. Cc: Tony Lindgren Cc: Liam Breck Signed-off-by: Matt Ranostay Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index b50c0492629d..e30deb046156 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -58,6 +58,7 @@ struct bq27xxx_device_info { unsigned long last_update; struct delayed_work work; struct power_supply *bat; + struct list_head list; struct mutex lock; u8 *regs; }; -- cgit v1.2.3 From 36bbef52c7eb646ed6247055a2acd3851e317857 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 20 Sep 2016 00:26:13 +0200 Subject: bpf: direct packet write and access for helpers for clsact progs This work implements direct packet access for helpers and direct packet write in a similar fashion as already available for XDP types via commits 4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and 6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a complementary feature to the already available direct packet read for tc (cls/act) programs. For enabling this, we need to introduce two helpers, bpf_skb_pull_data() and bpf_csum_update(). The first is generally needed for both, read and write, because they would otherwise only be limited to the current linear skb head. Usually, when the data_end test fails, programs just bail out, or, in the direct read case, use bpf_skb_load_bytes() as an alternative to overcome this limitation. If such data sits in non-linear parts, we can just pull them in once with the new helper, retest and eventually access them. At the same time, this also makes sure the skb is uncloned, which is, of course, a necessary condition for direct write. As this needs to be an invariant for the write part only, the verifier detects writes and adds a prologue that is calling bpf_skb_pull_data() to effectively unclone the skb from the very beginning in case it is indeed cloned. The heuristic makes use of a similar trick that was done in 233577a22089 ("net: filter: constify detection of pkt_type_offset"). This comes at zero cost for other programs that do not use the direct write feature. Should a program use this feature only sparsely and has read access for the most parts with, for example, drop return codes, then such write action can be delegated to a tail called program for mitigating this cost of potential uncloning to a late point in time where it would have been paid similarly with the bpf_skb_store_bytes() as well. Advantage of direct write is that the writes are inlined whereas the helper cannot make any length assumptions and thus needs to generate a call to memcpy() also for small sizes, as well as cost of helper call itself with sanity checks are avoided. Plus, when direct read is already used, we don't need to cache or perform rechecks on the data boundaries (due to verifier invalidating previous checks for helpers that change skb->data), so more complex programs using rewrites can benefit from switching to direct read plus write. For direct packet access to helpers, we save the otherwise needed copy into a temp struct sitting on stack memory when use-case allows. Both facilities are enabled via may_access_direct_pkt_data() in verifier. For now, we limit this to map helpers and csum_diff, and can successively enable other helpers where we find it makes sense. Helpers that definitely cannot be allowed for this are those part of bpf_helper_changes_skb_data() since they can change underlying data, and those that write into memory as this could happen for packet typed args when still cloned. bpf_csum_update() helper accommodates for the fact that we need to fixup checksum_complete when using direct write instead of bpf_skb_store_bytes(), meaning the programs can use available helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(), csum_block_add(), csum_block_sub() equivalents in eBPF together with the new helper. A usage example will be provided for iproute2's examples/bpf/ directory. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +++- include/linux/skbuff.h | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9a904f63f8c1..5691fdc83819 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -96,6 +96,7 @@ enum bpf_return_type { struct bpf_func_proto { u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool gpl_only; + bool pkt_access; enum bpf_return_type ret_type; enum bpf_arg_type arg1_type; enum bpf_arg_type arg2_type; @@ -151,7 +152,8 @@ struct bpf_verifier_ops { */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type); - + int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, + const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn, struct bpf_prog *prog); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4c5662f05bda..c6dab3f7457c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -676,13 +676,23 @@ struct sk_buff { */ kmemcheck_bitfield_begin(flags1); __u16 queue_mapping; + +/* if you move cloned around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define CLONED_MASK (1 << 7) +#else +#define CLONED_MASK 1 +#endif +#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + + __u8 __cloned_offset[0]; __u8 cloned:1, nohdr:1, fclone:2, peeked:1, head_frag:1, - xmit_more:1; - /* one bit hole */ + xmit_more:1, + __unused:1; /* one bit hole */ kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied -- cgit v1.2.3 From a4f1f9ac8153e22869b6408832b5a9bb9c762bf6 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:09 -0400 Subject: lib/win_minmax: windowed min or max estimator This commit introduces a generic library to estimate either the min or max value of a time-varying variable over a recent time window. This is code originally from Kathleen Nichols. The current form of the code is from Van Jacobson. A single struct minmax_sample will track the estimated windowed-max value of the series if you call minmax_running_max() or the estimated windowed-min value of the series if you call minmax_running_min(). Nearly equivalent code is already in place for minimum RTT estimation in the TCP stack. This commit extracts that code and generalizes it to handle both min and max. Moving the code here reduces the footprint and complexity of the TCP code base and makes the filter generally available for other parts of the codebase, including an upcoming TCP congestion control module. This library works well for time series where the measurements are smoothly increasing or decreasing. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/win_minmax.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/linux/win_minmax.h (limited to 'include/linux') diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h new file mode 100644 index 000000000000..56569604278f --- /dev/null +++ b/include/linux/win_minmax.h @@ -0,0 +1,37 @@ +/** + * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. + * + */ +#ifndef MINMAX_H +#define MINMAX_H + +#include + +/* A single data point for our parameterized min-max tracker */ +struct minmax_sample { + u32 t; /* time measurement was taken */ + u32 v; /* value measured */ +}; + +/* State for the parameterized min-max tracker */ +struct minmax { + struct minmax_sample s[3]; +}; + +static inline u32 minmax_get(const struct minmax *m) +{ + return m->s[0].v; +} + +static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + m->s[2] = m->s[1] = m->s[0] = val; + return m->s[0].v; +} + +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); + +#endif -- cgit v1.2.3 From 6403389211e1f4d40ed963fe47a96fce1a3ba7a9 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:10 -0400 Subject: tcp: use windowed min filter library for TCP min_rtt estimation Refactor the TCP min_rtt code to reuse the new win_minmax library in lib/win_minmax.c to simplify the TCP code. This is a pure refactor: the functionality is exactly the same. We just moved the windowed min code to make TCP easier to read and maintain, and to allow other parts of the kernel to use the windowed min/max filter code. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c723a465125d..6433cc8b4667 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -234,9 +235,7 @@ struct tcp_sock { u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ - struct rtt_meas { - u32 rtt, ts; /* RTT in usec and sampling time in jiffies. */ - } rtt_min[3]; + struct minmax rtt_min; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ -- cgit v1.2.3 From 0682e6902a52aca7caf6ad42551b16ea0f87bc31 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:13 -0400 Subject: tcp: count packets marked lost for a TCP connection Count the number of packets that a TCP connection marks lost. Congestion control modules can use this loss rate information for more intelligent decisions about how fast to send. Specifically, this is used in TCP BBR policer detection. BBR uses a high packet loss rate as one signal in its policer detection and policer bandwidth estimation algorithm. The BBR policer detection algorithm cannot simply track retransmits, because a retransmit can be (and often is) an indicator of packets lost long, long ago. This is particularly true in a long CA_Loss period that repairs the initial massive losses when a policer kicks in. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6433cc8b4667..38590fbc0ac5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -267,6 +267,7 @@ struct tcp_sock { * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 lost; /* Total data packets lost incl. rexmits */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ -- cgit v1.2.3 From b9f64820fb226a4e8ab10591f46cecd91ca56b30 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:14 -0400 Subject: tcp: track data delivery rate for a TCP connection This patch generates data delivery rate (throughput) samples on a per-ACK basis. These rate samples can be used by congestion control modules, and specifically will be used by TCP BBR in later patches in this series. Key state: tp->delivered: Tracks the total number of data packets (original or not) delivered so far. This is an already-existing field. tp->delivered_mstamp: the last time tp->delivered was updated. Algorithm: A rate sample is calculated as (d1 - d0)/(t1 - t0) on a per-ACK basis: d1: the current tp->delivered after processing the ACK t1: the current time after processing the ACK d0: the prior tp->delivered when the acked skb was transmitted t0: the prior tp->delivered_mstamp when the acked skb was transmitted When an skb is transmitted, we snapshot d0 and t0 in its control block in tcp_rate_skb_sent(). When an ACK arrives, it may SACK and ACK some skbs. For each SACKed or ACKed skb, tcp_rate_skb_delivered() updates the rate_sample struct to reflect the latest (d0, t0). Finally, tcp_rate_gen() generates a rate sample by storing (d1 - d0) in rs->delivered and (t1 - t0) in rs->interval_us. One caveat: if an skb was sent with no packets in flight, then tp->delivered_mstamp may be either invalid (if the connection is starting) or outdated (if the connection was idle). In that case, we'll re-stamp tp->delivered_mstamp. At first glance it seems t0 should always be the time when an skb was transmitted, but actually this could over-estimate the rate due to phase mismatch between transmit and ACK events. To track the delivery rate, we ensure that if packets are in flight then t0 and and t1 are times at which packets were marked delivered. If the initial and final RTTs are different then one may be corrupted by some sort of noise. The noise we see most often is sending gaps caused by delayed, compressed, or stretched acks. This either affects both RTTs equally or artificially reduces the final RTT. We approach this by recording the info we need to compute the initial RTT (duration of the "send phase" of the window) when we recorded the associated inflight. Then, for a filter to avoid bandwidth overestimates, we generalize the per-sample bandwidth computation from: bw = delivered / ack_phase_rtt to the following: bw = delivered / max(send_phase_rtt, ack_phase_rtt) In large-scale experiments, this filtering approach incorporating send_phase_rtt is effective at avoiding bandwidth overestimates due to ACK compression or stretched ACKs. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 38590fbc0ac5..c50e6aec005a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,8 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + struct skb_mstamp first_tx_mstamp; /* start of window send phase */ + struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ -- cgit v1.2.3 From d7722e8570fc0f1e003cee7cf37694041828918b Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 19 Sep 2016 23:39:15 -0400 Subject: tcp: track application-limited rate samples This commit adds code to track whether the delivery rate represented by each rate_sample was limited by the application. Upon each transmit, we store in the is_app_limited field in the skb a boolean bit indicating whether there is a known "bubble in the pipe": a point in the rate sample interval where the sender was application-limited, and did not transmit even though the cwnd and pacing rate allowed it. This logic marks the flow app-limited on a write if *all* of the following are true: 1) There is less than 1 MSS of unsent data in the write queue available to transmit. 2) There is no packet in the sender's queues (e.g. in fq or the NIC tx queue). 3) The connection is not limited by cwnd. 4) There are no lost packets to retransmit. The tcp_rate_check_app_limited() code in tcp_rate.c determines whether the connection is application-limited at the moment. If the flow is application-limited, it sets the tp->app_limited field. If the flow is application-limited then that means there is effectively a "bubble" of silence in the pipe now, and this silence will be reflected in a lower bandwidth sample for any rate samples from now until we get an ACK indicating this bubble has exited the pipe: specifically, until we get an ACK for the next packet we transmit. When we send every skb we record in scb->tx.is_app_limited whether the resulting rate sample will be application-limited. The code in tcp_rate_gen() checks to see when it is safe to mark all known application-limited bubbles of silence as having exited the pipe. It does this by checking to see when the delivered count moves past the tp->app_limited marker. At this point it zeroes the tp->app_limited marker, as all known bubbles are out of the pipe. We make room for the tx.is_app_limited bit in the skb by borrowing a bit from the in_flight field used by NV to record the number of bytes in flight. The receive window in the TCP header is 16 bits, and the max receive window scaling shift factor is 14 (RFC 1323). So the max receive window offered by the TCP protocol is 2^(16+14) = 2^30. So we only need 30 bits for the tx.in_flight used by NV. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c50e6aec005a..fdcd00ffcb66 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,7 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + u32 app_limited; /* limited until "delivered" reaches this val */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ -- cgit v1.2.3 From eb8329e0a04db0061f714f033b4454326ba147f4 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:16 -0400 Subject: tcp: export data delivery rate This commit export two new fields in struct tcp_info: tcpi_delivery_rate: The most recent goodput, as measured by tcp_rate_gen(). If the socket is limited by the sending application (e.g., no data to send), it reports the highest measurement instead of the most recent. The unit is bytes per second (like other rate fields in tcp_info). tcpi_delivery_rate_app_limited: A boolean indicating if the goodput was measured when the socket's throughput was limited by the sending application. This delivery rate information can be useful for applications that want to know the current throughput the TCP connection is seeing, e.g. adaptive bitrate video streaming. It can also be very useful for debugging or troubleshooting. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fdcd00ffcb66..a17ae7b85218 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -213,7 +213,8 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 unused; + u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + unused:7; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ @@ -271,6 +272,8 @@ struct tcp_sock { u32 app_limited; /* limited until "delivered" reaches this val */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ + u32 rate_delivered; /* saved rate sample: packets delivered */ + u32 rate_interval_us; /* saved rate sample: time elapsed */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ -- cgit v1.2.3 From 86f0e06767dda7863d6d2a8f0b3b857e6ea876a0 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 17 Sep 2016 21:43:01 +0200 Subject: debugfs: introduce a public file_operations accessor This patch introduces an accessor which can be used by the users of debugfs (drivers, fs, ...) to get the original file_operations struct. It also removes the REAL_FOPS_DEREF macro in file.c and converts the code to use the public version. Previously, REAL_FOPS_DEREF was only available within the file.c of debugfs. But having a public getter available for debugfs users is important as some drivers (carl9170 and b43) use the pointer of the original file_operations in conjunction with container_of() within their debugfs implementations. Reviewed-by: Nicolai Stange Signed-off-by: Christian Lamparter Cc: stable # 4.7+ Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 1438e2322d5c..4d3f0d1aec73 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -45,6 +45,23 @@ extern struct dentry *arch_debugfs_dir; extern struct srcu_struct debugfs_srcu; +/** + * debugfs_real_fops - getter for the real file operation + * @filp: a pointer to a struct file + * + * Must only be called under the protection established by + * debugfs_use_file_start(). + */ +static inline const struct file_operations *debugfs_real_fops(struct file *filp) + __must_hold(&debugfs_srcu) +{ + /* + * Neither the pointer to the struct file_operations, nor its + * contents ever change -- srcu_dereference() is not needed here. + */ + return filp->f_path.dentry->d_fsdata; +} + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_create_file(const char *name, umode_t mode, -- cgit v1.2.3 From b21d5b301794ae332eaa6e177d71fe8b77d3664c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 16 Sep 2016 14:25:06 +0200 Subject: blk-mq: register device instead of disk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable devices without a gendisk instance to register itself with blk-mq and expose the associated multi-queue sysfs entries. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2575779cf13f..fbcfdf323243 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -175,8 +175,8 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); -int blk_mq_register_disk(struct gendisk *); -void blk_mq_unregister_disk(struct gendisk *); +int blk_mq_register_dev(struct device *, struct request_queue *); +void blk_mq_unregister_dev(struct device *, struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); -- cgit v1.2.3 From b0b4e09c1ae71c4ec33df0616b830ae050006e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 16 Sep 2016 14:25:07 +0200 Subject: lightnvm: control life of nvm_dev in driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LightNVM compatible device drivers does not have a method to expose LightNVM specific sysfs entries. To enable LightNVM sysfs entries to be exposed, lightnvm device drivers require a struct device to attach it to. To allow both the actual device driver and lightnvm sysfs entries to coexist, the device driver tracks the lifetime of the nvm_dev structure. This patch refactors NVMe and null_blk to handle the lifetime of struct nvm_dev, which eliminates the need for struct gendisk when a lightnvm compatible device is provided. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index ba78b8306674..5afc2634f332 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -524,9 +524,9 @@ extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *, unsigned long); extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *); -extern int nvm_register(struct request_queue *, char *, - struct nvm_dev_ops *); -extern void nvm_unregister(char *); +extern struct nvm_dev *nvm_alloc_dev(int); +extern int nvm_register(struct nvm_dev *); +extern void nvm_unregister(struct nvm_dev *); void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type); @@ -575,11 +575,14 @@ extern int nvm_dev_factory(struct nvm_dev *, int flags); #else /* CONFIG_NVM */ struct nvm_dev_ops; -static inline int nvm_register(struct request_queue *q, char *disk_name, - struct nvm_dev_ops *ops) +static inline struct nvm_dev *nvm_alloc_dev(int node) +{ + return ERR_PTR(-EINVAL); +} +static inline int nvm_register(struct nvm_dev *dev) { return -EINVAL; } -static inline void nvm_unregister(char *disk_name) {} +static inline void nvm_unregister(struct nvm_dev *dev) {} #endif /* CONFIG_NVM */ #endif /* LIGHTNVM.H */ -- cgit v1.2.3 From 40267efddc296190d50c61d96daf277151447cf6 Mon Sep 17 00:00:00 2001 From: "Simon A. F. Lund" Date: Fri, 16 Sep 2016 14:25:08 +0200 Subject: lightnvm: expose device geometry through sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a host to access an Open-Channel SSD, it has to know its geometry, so that it writes and reads at the appropriate device bounds. Currently, the geometry information is kept within the kernel, and not exported to user-space for consumption. This patch exposes the configuration through sysfs and enables user-space libraries, such as liblightnvm, to use the sysfs implementation to get the geometry of an Open-Channel SSD. The sysfs entries are stored within the device hierarchy, and can be found using the "lightnvm" device type. An example configuration looks like this: /sys/class/nvme/ └── nvme0n1 ├── capabilities: 3 ├── device_mode: 1 ├── erase_max: 1000000 ├── erase_typ: 1000000 ├── flash_media_type: 0 ├── media_capabilities: 0x00000001 ├── media_type: 0 ├── multiplane: 0x00010101 ├── num_blocks: 1022 ├── num_channels: 1 ├── num_luns: 4 ├── num_pages: 64 ├── num_planes: 1 ├── page_size: 4096 ├── prog_max: 100000 ├── prog_typ: 100000 ├── read_max: 10000 ├── read_typ: 10000 ├── sector_oob_size: 0 ├── sector_size: 4096 ├── media_manager: gennvm ├── ppa_format: 0x380830082808001010102008 ├── vendor_opcode: 0 ├── max_phys_secs: 64 └── version: 1 Signed-off-by: Simon A. F. Lund Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 5afc2634f332..d190786e4ad8 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -352,7 +352,10 @@ struct nvm_dev { /* Backend device */ struct request_queue *q; + struct device dev; + struct device *parent_dev; char name[DISK_NAME_LEN]; + void *private_data; struct mutex mlock; spinlock_t lock; -- cgit v1.2.3 From e0a491c1296874a1aca51cc68452f12a4d950029 Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Fri, 12 Aug 2016 18:03:19 -0700 Subject: lib/raid6: Add AVX512 optimized gen_syndrome functions Optimize RAID6 gen_syndrom functions to take advantage of the 512-bit ZMM integer instructions introduced in AVX512. AVX512 optimized gen_syndrom functions, which is simply based on avx2.c written by Yuanhan Liu and sse2.c written by hpa. The patch was tested and benchmarked before submission on a hardware that has AVX512 flags to support such instructions Cc: H. Peter Anvin Cc: Jim Kukunas Cc: Fenghua Yu Signed-off-by: Megha Dey Signed-off-by: Gayatri Kammela Reviewed-by: Fenghua Yu Signed-off-by: Shaohua Li --- include/linux/raid/pq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index a0118d5929a9..0c529a55b52e 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -102,6 +102,9 @@ extern const struct raid6_calls raid6_altivec8; extern const struct raid6_calls raid6_avx2x1; extern const struct raid6_calls raid6_avx2x2; extern const struct raid6_calls raid6_avx2x4; +extern const struct raid6_calls raid6_avx512x1; +extern const struct raid6_calls raid6_avx512x2; +extern const struct raid6_calls raid6_avx512x4; extern const struct raid6_calls raid6_tilegx8; struct raid6_recov_calls { -- cgit v1.2.3 From 13c520b2993c9faae6770264d33ff1e1ea4c2ceb Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Fri, 12 Aug 2016 18:03:20 -0700 Subject: lib/raid6: Add AVX512 optimized recovery functions Optimize RAID6 recovery functions to take advantage of the 512-bit ZMM integer instructions introduced in AVX512. AVX512 optimized recovery functions, which is simply based on recov_avx2.c written by Jim Kukunas This patch was tested and benchmarked before submission on a hardware that has AVX512 flags to support such instructions Cc: Jim Kukunas Cc: H. Peter Anvin Cc: Fenghua Yu Signed-off-by: Megha Dey Signed-off-by: Gayatri Kammela Reviewed-by: Fenghua Yu Signed-off-by: Shaohua Li --- include/linux/raid/pq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 0c529a55b52e..1abd89584568 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -118,6 +118,7 @@ struct raid6_recov_calls { extern const struct raid6_recov_calls raid6_recov_intx1; extern const struct raid6_recov_calls raid6_recov_ssse3; extern const struct raid6_recov_calls raid6_recov_avx2; +extern const struct raid6_recov_calls raid6_recov_avx512; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; -- cgit v1.2.3 From dc6db24d2476cd09c0ecf2b8d80313539f737a89 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 25 Aug 2016 16:35:18 +0800 Subject: x86/acpi: Set persistent cpuid <-> nodeid mapping when booting The whole patch-set aims at making cpuid <-> nodeid mapping persistent. So that, when node online/offline happens, cache based on cpuid <-> nodeid mapping such as wq_numa_possible_cpumask will not cause any problem. It contains 4 steps: 1. Enable apic registeration flow to handle both enabled and disabled cpus. 2. Introduce a new array storing all possible cpuid <-> apicid mapping. 3. Enable _MAT and MADT relative apis to return non-present or disabled cpus' apicid. 4. Establish all possible cpuid <-> nodeid mapping. This patch finishes step 4. This patch set the persistent cpuid <-> nodeid mapping for all enabled/disabled processors at boot time via an additional acpi namespace walk for processors. [ tglx: Remove the unneeded exports ] Signed-off-by: Gu Zheng Signed-off-by: Tang Chen Signed-off-by: Zhu Guihua Signed-off-by: Dou Liyang Acked-by: Ingo Molnar Cc: mika.j.penttila@gmail.com Cc: len.brown@intel.com Cc: rafael@kernel.org Cc: rjw@rjwysocki.net Cc: yasu.isimatu@gmail.com Cc: linux-mm@kvack.org Cc: linux-acpi@vger.kernel.org Cc: isimatu.yasuaki@jp.fujitsu.com Cc: gongzhaogang@inspur.com Cc: tj@kernel.org Cc: izumi.taku@jp.fujitsu.com Cc: cl@linux.com Cc: chen.tang@easystack.cn Cc: akpm@linux-foundation.org Cc: kamezawa.hiroyu@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1472114120-3281-6-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- include/linux/acpi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c9a596b9535c..5b4f9accf96b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -271,8 +271,11 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); int acpi_unmap_cpu(int cpu); +int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ +void acpi_set_processor_mapping(void); + #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif -- cgit v1.2.3 From fd74da217df7d4bd25e95411da64e0b92762842e Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 25 Aug 2016 16:35:20 +0800 Subject: acpi: Validate processor id when mapping the processor When we want to identify whether the proc_id is unreasonable or not, we can call the "acpi_processor_validate_proc_id" function. It will search in the duplicate IDs. If we find the proc_id in the IDs, we return true to the call function. Conversely, the false represents available. When we establish all possible cpuid <-> nodeid mapping to handle the cpu hotplugs, we will use the proc_id from ACPI table. We do validation when we get the proc_id. If the result is true, we will stop the mapping. [ tglx: Mark the new function __init ] Signed-off-by: Dou Liyang Acked-by: Ingo Molnar Cc: mika.j.penttila@gmail.com Cc: len.brown@intel.com Cc: rafael@kernel.org Cc: rjw@rjwysocki.net Cc: yasu.isimatu@gmail.com Cc: linux-mm@kvack.org Cc: linux-acpi@vger.kernel.org Cc: isimatu.yasuaki@jp.fujitsu.com Cc: gongzhaogang@inspur.com Cc: tj@kernel.org Cc: izumi.taku@jp.fujitsu.com Cc: cl@linux.com Cc: chen.tang@easystack.cn Cc: akpm@linux-foundation.org Cc: kamezawa.hiroyu@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1472114120-3281-8-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- include/linux/acpi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5b4f9accf96b..7f307f3bd12c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -267,6 +267,9 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) return phys_id == PHYS_CPUID_INVALID; } +/* Validate the processor object's proc_id */ +bool acpi_processor_validate_proc_id(int proc_id); + #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); -- cgit v1.2.3 From 332ae8e2f6ecda5e50c5c62ed62894963e3a83f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:53 +0100 Subject: net: cls_bpf: add hardware offload This patch adds hardware offload capability to cls_bpf classifier, similar to what have been done with U32 and flower. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a10d8d18ce19..69f242c71865 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -789,6 +789,7 @@ enum { TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_MATCHALL, + TC_SETUP_CLSBPF, }; struct tc_cls_u32_offload; @@ -800,6 +801,7 @@ struct tc_to_netdev { struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; struct tc_cls_matchall_offload *cls_mall; + struct tc_cls_bpf_offload *cls_bpf; }; }; -- cgit v1.2.3 From 58e2af8b3a6b587e4ac8414343581da4349d3c0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:57 +0100 Subject: bpf: expose internal verfier structures Move verifier's internal structures to a header file and prefix their names with bpf_ to avoid potential namespace conflicts. Those structures will soon be used by external analyzers. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 79 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 include/linux/bpf_verifier.h (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h new file mode 100644 index 000000000000..9457a22fc6e0 --- /dev/null +++ b/include/linux/bpf_verifier.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _LINUX_BPF_VERIFIER_H +#define _LINUX_BPF_VERIFIER_H 1 + +#include /* for enum bpf_reg_type */ +#include /* for MAX_BPF_STACK */ + +struct bpf_reg_state { + enum bpf_reg_type type; + union { + /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ + s64 imm; + + /* valid when type == PTR_TO_PACKET* */ + struct { + u32 id; + u16 off; + u16 range; + }; + + /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | + * PTR_TO_MAP_VALUE_OR_NULL + */ + struct bpf_map *map_ptr; + }; +}; + +enum bpf_stack_slot_type { + STACK_INVALID, /* nothing was stored in this stack slot */ + STACK_SPILL, /* register spilled into stack */ + STACK_MISC /* BPF program wrote some data into this slot */ +}; + +#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + +/* state of the program: + * type of all registers and stack info + */ +struct bpf_verifier_state { + struct bpf_reg_state regs[MAX_BPF_REG]; + u8 stack_slot_type[MAX_BPF_STACK]; + struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; +}; + +/* linked list of verifier states used to prune search */ +struct bpf_verifier_state_list { + struct bpf_verifier_state state; + struct bpf_verifier_state_list *next; +}; + +struct bpf_insn_aux_data { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ +}; + +#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ + +/* single container for all structs + * one verifier_env per bpf_check() call + */ +struct bpf_verifier_env { + struct bpf_prog *prog; /* eBPF program being verified */ + struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ + int stack_size; /* number of states to be processed */ + struct bpf_verifier_state cur_state; /* current verifier state */ + struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + u32 used_map_cnt; /* number of used maps */ + u32 id_gen; /* used to generate unique reg IDs */ + bool allow_ptr_leaks; + bool seen_direct_write; + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ +}; + +#endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From 13a27dfc669724564aafa2699976ee756029fed2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:58 +0100 Subject: bpf: enable non-core use of the verfier Advanced JIT compilers and translators may want to use eBPF verifier as a base for parsers or to perform custom checks and validations. Add ability for external users to invoke the verifier and provide callbacks to be invoked for every intruction checked. For now only add most basic callback for per-instruction pre-interpretation checks is added. More advanced users may also like to have per-instruction post callback and state comparison callback. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9457a22fc6e0..c5cb661712c9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -59,6 +59,12 @@ struct bpf_insn_aux_data { #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +struct bpf_verifier_env; +struct bpf_ext_analyzer_ops { + int (*insn_hook)(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx); +}; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -68,6 +74,8 @@ struct bpf_verifier_env { int stack_size; /* number of states to be processed */ struct bpf_verifier_state cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ + void *analyzer_priv; /* pointer to external analyzer's private data */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ @@ -76,4 +84,7 @@ struct bpf_verifier_env { struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv); + #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From bfca4c520f7ea78138ddccea2de18dc062b0fefd Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 19 Sep 2016 19:11:09 +0300 Subject: net: skbuff: Export __skb_vlan_pop This exports the functionality of extracting the tag from the payload, without moving next vlan tag into hw accel tag. Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c6dab3f7457c..9bf60b556bd2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3085,6 +3085,7 @@ bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); +int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, -- cgit v1.2.3 From efee95f42b5dddedcaff0a0eaa44e170fc7522e8 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 20 Sep 2016 19:25:58 -0400 Subject: ptp_clock: future-proofing drivers against PTP subsystem becoming optional Drivers must be ready to accept NULL from ptp_clock_register() if the PTP clock subsystem is configured out. This patch documents that and ensures that all drivers cope well with a NULL return. Signed-off-by: Nicolas Pitre Reviewed-by: Eugenia Emantayev Acked-by: Richard Cochran Acked-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 6b15e168148a..5ad54fc66cf0 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -127,6 +127,11 @@ struct ptp_clock; * * @info: Structure describing the new clock. * @parent: Pointer to the parent device of the new clock. + * + * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * support is missing at the configuration level, this function + * returns NULL, and drivers are expected to gracefully handle that + * case separately. */ extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, -- cgit v1.2.3 From 77f2efcbdd7133466060198e02c6e8a170c3cd14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Sep 2016 00:29:01 +0100 Subject: rxrpc: Add ktime_sub_ms() Add a ktime_sub_ms() to go with ktime_add_ms() and co. for use in AF_RXRPC RTT determination. Signed-off-by: David Howells --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 2b6a204bd8d4..aa118bad1407 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -231,6 +231,11 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) return ktime_sub_ns(kt, usec * NSEC_PER_USEC); } +static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) +{ + return ktime_sub_ns(kt, msec * NSEC_PER_MSEC); +} + extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); /** -- cgit v1.2.3 From 9abefcb1aaa58b9d5aa40a8bb12c87d02415e4c8 Mon Sep 17 00:00:00 2001 From: Sergei Miroshnichenko Date: Wed, 7 Sep 2016 16:51:12 +0300 Subject: can: dev: fix deadlock reported after bus-off A timer was used to restart after the bus-off state, leading to a relatively large can_restart() executed in an interrupt context, which in turn sets up pinctrl. When this happens during system boot, there is a high probability of grabbing the pinctrl_list_mutex, which is locked already by the probe() of other device, making the kernel suspect a deadlock condition [1]. To resolve this issue, the restart_timer is replaced by a delayed work. [1] https://github.com/victronenergy/venus/issues/24 Signed-off-by: Sergei Miroshnichenko Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 5261751f6bd4..5f5270941ba0 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -32,6 +32,7 @@ enum can_mode { * CAN common private data */ struct can_priv { + struct net_device *dev; struct can_device_stats can_stats; struct can_bittiming bittiming, data_bittiming; @@ -47,7 +48,7 @@ struct can_priv { u32 ctrlmode_static; /* static enabled options for driver/hardware */ int restart_ms; - struct timer_list restart_timer; + struct delayed_work restart_work; int (*do_set_bittiming)(struct net_device *dev); int (*do_set_data_bittiming)(struct net_device *dev); -- cgit v1.2.3 From 073931017b49d9458aa351605b43a7e34598caef Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 17:39:09 +0200 Subject: posix_acl: Clear SGID bit when setting file permissions When file permissions are modified via chmod(2) and the user is not in the owning group or capable of CAP_FSETID, the setgid bit is cleared in inode_change_ok(). Setting a POSIX ACL via setxattr(2) sets the file permissions as well as the new ACL, but doesn't clear the setgid bit in a similar way; this allows to bypass the check in chmod(2). Fix that. References: CVE-2016-7097 Reviewed-by: Christoph Hellwig Reviewed-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Andreas Gruenbacher --- include/linux/posix_acl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index d5d3d741f028..bf1046d0397b 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -93,6 +93,7 @@ extern int set_posix_acl(struct inode *, int, struct posix_acl *); extern int posix_acl_chmod(struct inode *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, struct posix_acl **); +extern int posix_acl_update_mode(struct inode *, umode_t *, struct posix_acl **); extern int simple_set_acl(struct inode *, struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *); -- cgit v1.2.3 From 31051c85b5e2aaaf6315f74c72a732673632a905 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 May 2016 16:55:18 +0200 Subject: fs: Give dentry to inode_change_ok() instead of inode inode_change_ok() will be resposible for clearing capabilities and IMA extended attributes and as such will need dentry. Give it as an argument to inode_change_ok() instead of an inode. Also rename inode_change_ok() to setattr_prepare() to better relect that it does also some modifications in addition to checks. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3523bf62f328..943a21cbb469 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2994,7 +2994,7 @@ extern int buffer_migrate_page(struct address_space *, #define buffer_migrate_page NULL #endif -extern int inode_change_ok(const struct inode *, struct iattr *); +extern int setattr_prepare(struct dentry *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); extern void setattr_copy(struct inode *inode, const struct iattr *attr); -- cgit v1.2.3 From 572de608e36279f249c9a6350f142e69f23dacab Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 22 Sep 2016 10:33:54 +0800 Subject: net: ethernet: mediatek: add extension of phy-mode for TRGMII adds PHY-mode "trgmii" as an extension for the operation mode of the PHY interface for PHY_INTERFACE_MODE_TRGMII. and adds a variable trgmii inside mtk_mac as the indication to make the difference between the MAC connected to internal switch or connected to external PHY by the given configuration on the board and then to perform the corresponding setup on TRGMII hardware module. Signed-off-by: Sean Wang Cc: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 2d24b283aa2d..e25f1830fbcf 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -80,6 +80,7 @@ typedef enum { PHY_INTERFACE_MODE_XGMII, PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, + PHY_INTERFACE_MODE_TRGMII, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -123,6 +124,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "moca"; case PHY_INTERFACE_MODE_QSGMII: return "qsgmii"; + case PHY_INTERFACE_MODE_TRGMII: + return "trgmii"; default: return "unknown"; } -- cgit v1.2.3 From 9af6528ee9b682df7f29dbee86fbba0b67eab944 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 13 Sep 2016 18:37:29 +0200 Subject: sched/core: Optimize __schedule() Oleg noted that by making do_exit() use __schedule() for the TASK_DEAD context switch, we can avoid the TASK_DEAD special case currently in __schedule() because that avoids the extra preempt_disable() from schedule(). In order to facilitate this, create a do_task_dead() helper which we place in the scheduler code, such that it can access __schedule(). Also add some __noreturn annotations to the functions, there's no coming back from do_exit(). Suggested-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Cheng Chao Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: chris@chris-wilson.co.uk Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20160913163729.GB5012@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 9 +++------ include/linux/sched.h | 2 ++ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d96a6118d26a..74fd6f05bc5b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -259,17 +259,14 @@ static inline void might_fault(void) { } extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(int state); __printf(1, 2) -void panic(const char *fmt, ...) - __noreturn __cold; +void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); extern void oops_enter(void); extern void oops_exit(void); void print_oops_end_marker(void); extern int oops_may_print(void); -void do_exit(long error_code) - __noreturn; -void complete_and_exit(struct completion *, long) - __noreturn; +void do_exit(long error_code) __noreturn; +void complete_and_exit(struct completion *, long) __noreturn; /* Internal, do not use. */ int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); diff --git a/include/linux/sched.h b/include/linux/sched.h index d75024053e9b..f00ee8e90a29 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -448,6 +448,8 @@ static inline void io_schedule(void) io_schedule_timeout(MAX_SCHEDULE_TIMEOUT); } +void __noreturn do_task_dead(void); + struct nsproxy; struct user_namespace; -- cgit v1.2.3 From 35a773a07926a22bf19d77ee00024522279c4e68 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 19 Sep 2016 12:57:53 +0200 Subject: sched/core: Avoid _cond_resched() for PREEMPT=y On fully preemptible kernels _cond_resched() is pointless, so avoid emitting any code for it. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mikulas Patocka Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f00ee8e90a29..b99fcd1b341e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3209,7 +3209,11 @@ static inline int signal_pending_state(long state, struct task_struct *p) * cond_resched_lock() will drop the spinlock before scheduling, * cond_resched_softirq() will enable bhs before scheduling. */ +#ifndef CONFIG_PREEMPT extern int _cond_resched(void); +#else +static inline int _cond_resched(void) { return 0; } +#endif #define cond_resched() ({ \ ___might_sleep(__FILE__, __LINE__, 0); \ -- cgit v1.2.3 From 11d9684ca638aad99f740ef3abcba2aa4c9290bf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2015 14:16:31 +0200 Subject: locking/percpu-rwsem: Add DEFINE_STATIC_PERCPU_RWSEMand percpu_rwsem_assert_held() Provide a static init and a standard locking assertion method. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: der.herr@hofr.at Cc: oleg@redhat.com Cc: paulmck@linux.vnet.ibm.com Cc: riel@redhat.com Cc: tj@kernel.org Cc: viro@ZenIV.linux.org.uk Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/percpu-rwsem.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 146efefde2a1..d402d3924a91 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -16,6 +16,15 @@ struct percpu_rw_semaphore { int readers_block; }; +#define DEFINE_STATIC_PERCPU_RWSEM(name) \ +static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ +static struct percpu_rw_semaphore name = { \ + .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ + .read_count = &__percpu_rwsem_rc_##name, \ + .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ + .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ +} + extern int __percpu_down_read(struct percpu_rw_semaphore *, int); extern void __percpu_up_read(struct percpu_rw_semaphore *); @@ -102,6 +111,9 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem) +#define percpu_rwsem_assert_held(sem) \ + lockdep_assert_held(&(sem)->rw_sem) + static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { -- cgit v1.2.3 From 259d69b7f056bc9a543c7d184e791ef6c2775081 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2015 15:23:55 +0100 Subject: locking/percpu-rwsem: Add down_read_preempt_disable() Provide a down_read()/up_read() variant that keeps preemption disabled over the whole thing, when possible. This avoids a needless preemption point for constructs such as: percpu_down_read(&global_rwsem); spin_lock(&lock); ... spin_unlock(&lock); percpu_up_read(&global_rwsem); Which perturbs timings. In particular it was found to cure a performance regression in a follow up patch in fs/locks.c Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/percpu-rwsem.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index d402d3924a91..5b2e6159b744 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -28,7 +28,7 @@ static struct percpu_rw_semaphore name = { \ extern int __percpu_down_read(struct percpu_rw_semaphore *, int); extern void __percpu_up_read(struct percpu_rw_semaphore *); -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem) { might_sleep(); @@ -46,13 +46,19 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) __this_cpu_inc(*sem->read_count); if (unlikely(!rcu_sync_is_idle(&sem->rss))) __percpu_down_read(sem, false); /* Unconditional memory barrier */ - preempt_enable(); + barrier(); /* - * The barrier() from preempt_enable() prevents the compiler from + * The barrier() prevents the compiler from * bleeding the critical section out. */ } +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +{ + percpu_down_read_preempt_disable(sem); + preempt_enable(); +} + static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { int ret = 1; @@ -76,13 +82,13 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) return ret; } -static inline void percpu_up_read(struct percpu_rw_semaphore *sem) +static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) { /* - * The barrier() in preempt_disable() prevents the compiler from + * The barrier() prevents the compiler from * bleeding the critical section out. */ - preempt_disable(); + barrier(); /* * Same as in percpu_down_read(). */ @@ -95,6 +101,12 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); } +static inline void percpu_up_read(struct percpu_rw_semaphore *sem) +{ + preempt_disable(); + percpu_up_read_preempt_enable(sem); +} + extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); -- cgit v1.2.3 From e6253970413d99f416f7de8bd516e5f1834d8216 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 21 Nov 2015 19:11:48 +0100 Subject: stop_machine: Remove stop_cpus_lock and lg_double_lock/unlock() stop_two_cpus() and stop_cpus() use stop_cpus_lock to avoid the deadlock, we need to ensure that the stopper functions can't be queued "backwards" from one another. This doesn't look nice; if we use lglock then we do not really need stopper->lock, cpu_stop_queue_work() could use lg_local_lock() under local_irq_save(). OTOH it would be even better to avoid lglock in stop_machine.c and remove lg_double_lock(). This patch adds "bool stop_cpus_in_progress" set/cleared by queue_stop_cpus_work(), and changes cpu_stop_queue_two_works() to busy wait until it is cleared. queue_stop_cpus_work() sets stop_cpus_in_progress = T lockless, but after it queues a work on CPU1 it must be visible to stop_two_cpus(CPU1, CPU2) which checks it under the same lock. And since stop_two_cpus() holds the 2nd lock too, queue_stop_cpus_work() can not clear stop_cpus_in_progress if it is also going to queue a work on CPU2, it needs to take that 2nd lock to do this. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tejun Heo Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20151121181148.GA433@redhat.com Signed-off-by: Ingo Molnar --- include/linux/lglock.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index c92ebd100d9b..0081f000e34b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -52,15 +52,10 @@ struct lglock { static struct lglock name = { .lock = &name ## _lock } void lg_lock_init(struct lglock *lg, char *name); - void lg_local_lock(struct lglock *lg); void lg_local_unlock(struct lglock *lg); void lg_local_lock_cpu(struct lglock *lg, int cpu); void lg_local_unlock_cpu(struct lglock *lg, int cpu); - -void lg_double_lock(struct lglock *lg, int cpu1, int cpu2); -void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2); - void lg_global_lock(struct lglock *lg); void lg_global_unlock(struct lglock *lg); -- cgit v1.2.3 From d32cdbfb0ba319e44f75437afde868f7cafdc467 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2015 18:36:16 +0100 Subject: locking/lglock: Remove lglock implementation It is now unused, remove it before someone else thinks its a good idea to use this. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/lglock.h | 76 -------------------------------------------------- 1 file changed, 76 deletions(-) delete mode 100644 include/linux/lglock.h (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h deleted file mode 100644 index 0081f000e34b..000000000000 --- a/include/linux/lglock.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Specialised local-global spinlock. Can only be declared as global variables - * to avoid overhead and keep things simple (and we don't want to start using - * these inside dynamically allocated structures). - * - * "local/global locks" (lglocks) can be used to: - * - * - Provide fast exclusive access to per-CPU data, with exclusive access to - * another CPU's data allowed but possibly subject to contention, and to - * provide very slow exclusive access to all per-CPU data. - * - Or to provide very fast and scalable read serialisation, and to provide - * very slow exclusive serialisation of data (not necessarily per-CPU data). - * - * Brlocks are also implemented as a short-hand notation for the latter use - * case. - * - * Copyright 2009, 2010, Nick Piggin, Novell Inc. - */ -#ifndef __LINUX_LGLOCK_H -#define __LINUX_LGLOCK_H - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -#define LOCKDEP_INIT_MAP lockdep_init_map -#else -#define LOCKDEP_INIT_MAP(a, b, c, d) -#endif - -struct lglock { - arch_spinlock_t __percpu *lock; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lock_class_key lock_key; - struct lockdep_map lock_dep_map; -#endif -}; - -#define DEFINE_LGLOCK(name) \ - static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ - = __ARCH_SPIN_LOCK_UNLOCKED; \ - struct lglock name = { .lock = &name ## _lock } - -#define DEFINE_STATIC_LGLOCK(name) \ - static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ - = __ARCH_SPIN_LOCK_UNLOCKED; \ - static struct lglock name = { .lock = &name ## _lock } - -void lg_lock_init(struct lglock *lg, char *name); -void lg_local_lock(struct lglock *lg); -void lg_local_unlock(struct lglock *lg); -void lg_local_lock_cpu(struct lglock *lg, int cpu); -void lg_local_unlock_cpu(struct lglock *lg, int cpu); -void lg_global_lock(struct lglock *lg); -void lg_global_unlock(struct lglock *lg); - -#else -/* When !CONFIG_SMP, map lglock to spinlock */ -#define lglock spinlock -#define DEFINE_LGLOCK(name) DEFINE_SPINLOCK(name) -#define DEFINE_STATIC_LGLOCK(name) static DEFINE_SPINLOCK(name) -#define lg_lock_init(lg, name) spin_lock_init(lg) -#define lg_local_lock spin_lock -#define lg_local_unlock spin_unlock -#define lg_local_lock_cpu(lg, cpu) spin_lock(lg) -#define lg_local_unlock_cpu(lg, cpu) spin_unlock(lg) -#define lg_global_lock spin_lock -#define lg_global_unlock spin_unlock -#endif - -#endif -- cgit v1.2.3 From 491221f88d00651e449c9caf7415b6453c8a77b7 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 22 Sep 2016 03:10:01 -0400 Subject: block: export bio_free_pages to other modules bio_free_pages is introduced in commit 1dfa0f68c040 ("block: add a helper to free bio bounce buffer pages"), we can reuse the func in other modules after it was imported. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Mike Snitzer Cc: Shaohua Li Signed-off-by: Guoqing Jiang Acked-by: Kent Overstreet Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index e00721a2dce1..97cb48f03dc7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -459,6 +459,7 @@ static inline void bio_flush_dcache_pages(struct bio *bi) extern void bio_copy_data(struct bio *dst, struct bio *src); extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); +extern void bio_free_pages(struct bio *bio); extern struct bio *bio_copy_user_iov(struct request_queue *, struct rq_map_data *, -- cgit v1.2.3 From 9467f85960a31d56f95371516e55e210e1e3d51c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Sep 2016 08:05:17 -0600 Subject: blk-mq/cpu-notif: Convert to new hotplug state machine Replace the block-mq notifier list management with the multi instance facility in the cpu hotplug state machine. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: linux-block@vger.kernel.org Cc: rt@linutronix.de Cc: Christoph Hellwing Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fbcfdf323243..b3d1a7f4b5f2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -7,12 +7,6 @@ struct blk_mq_tags; struct blk_flush_queue; -struct blk_mq_cpu_notifier { - struct list_head list; - void *data; - int (*notify)(void *data, unsigned long action, unsigned int cpu); -}; - struct blk_mq_hw_ctx { struct { spinlock_t lock; @@ -53,7 +47,7 @@ struct blk_mq_hw_ctx { struct delayed_work delay_work; - struct blk_mq_cpu_notifier cpu_notifier; + struct hlist_node cpuhp_dead; struct kobject kobj; unsigned long poll_considered; -- cgit v1.2.3 From b536fd587044af02183b3c02690431b93154f0fa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Sep 2016 07:48:17 -0700 Subject: timekeeping: Include the correct header for errno definitions asm-generic headers are only defaults for architectures. We need to get the proper defintion, which goes through and . Signed-off-by: Christoph Hellwig Cc: john.stultz@linaro.org Link: http://lkml.kernel.org/r/1474555697-8206-1-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- include/linux/timekeeping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 816b7543f81b..09168c52ab64 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -1,7 +1,7 @@ #ifndef _LINUX_TIMEKEEPING_H #define _LINUX_TIMEKEEPING_H -#include +#include /* Included from linux/ktime.h */ -- cgit v1.2.3 From dcd3ea81f572fba57d18e91743a525a9787c1f6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Sep 2016 07:49:50 -0700 Subject: libata: remove asm-generic is only intended for architecture defaults, and we can simply kill it off by moving the two defintions directly to . Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo --- include/linux/libata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index e37d4f99f510..616eef4d81ea 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -46,7 +46,8 @@ #ifdef CONFIG_ATA_NONSTANDARD #include #else -#include +#define ATA_PRIMARY_IRQ(dev) 14 +#define ATA_SECONDARY_IRQ(dev) 15 #endif /* -- cgit v1.2.3 From a1d617d8f134679741b0b35e8e1436b015ac5538 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:39 -0400 Subject: nfs: allow blocking locks to be awoken by lock callbacks Add a waitqueue head to the client structure. Have clients set a wait on that queue prior to requesting a lock from the server. If the lock is blocked, then we can use that to wait for wakeups. Note that we do need to do this "manually" since we need to set the wait on the waitqueue prior to requesting the lock, but requesting a lock can involve activities that can block. However, only do that for NFSv4.1 locks, either by compiling out all of the waitqueue handling when CONFIG_NFS_V4_1 is disabled, or skipping all of it at runtime if we're dealing with v4.0, or v4.1 servers that don't send lock callbacks. Note too that even when we expect to get a lock callback, RFC5661 section 20.11.4 is pretty clear that we still need to poll for them, so we do still sleep on a timeout. We do however always poll at the longest interval in that case. Signed-off-by: Jeff Layton [Anna: nfs4_retry_setlk() "status" should default to -ERESTARTSYS] Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 14a762d2734d..b34097c67848 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -103,6 +103,9 @@ struct nfs_client { #define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ #define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ #define NFS_SP4_MACH_CRED_PNFS_CLEANUP 7 /* LAYOUTRETURN */ +#if IS_ENABLED(CONFIG_NFS_V4_1) + wait_queue_head_t cl_lock_waitq; +#endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4 */ /* Our own IP address, as a null-terminated string. -- cgit v1.2.3 From 1b792f2f92784c00db2e6431496e437855d6f12a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Sep 2016 10:12:13 -0600 Subject: blk-mq: add flag for drivers wanting blocking ->queue_rq() If a driver sets BLK_MQ_F_BLOCKING, it is allowed to block in its ->queue_rq() handler. For that case, blk-mq ensures that we always calls it from a safe context. Signed-off-by: Jens Axboe Tested-by: Josef Bacik --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fbcfdf323243..5daa0ef756dd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -155,6 +155,7 @@ enum { BLK_MQ_F_TAG_SHARED = 1 << 1, BLK_MQ_F_SG_MERGE = 1 << 2, BLK_MQ_F_DEFER_ISSUE = 1 << 4, + BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, -- cgit v1.2.3 From 576243b3f9eaa47ab568ac49574b3a095c2365f1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 13 Sep 2016 10:31:59 -0600 Subject: PCI: pciehp: Allow exclusive userspace control of indicators PCIe hotplug supports optional Attention and Power Indicators, which are used internally by pciehp. Users can't control the Power Indicator, but they can control the Attention Indicator by writing to a sysfs "attention" file. The Slot Control register has two bits for each indicator, and the PCIe spec defines the encodings for each as (Reserved/On/Blinking/Off). For sysfs "attention" writes, pciehp_set_attention_status() maps into these encodings, so the only useful write values are 0 (Off), 1 (On), and 2 (Blinking). However, some platforms use all four bits for platform-specific indicators, and they need to allow direct user control of them while preventing pciehp from using them at all. Add a "hotplug_user_indicators" flag to the pci_dev structure. When set, pciehp does not use either the Attention Indicator or the Power Indicator, and the low four bits (values 0x0 - 0xf) of sysfs "attention" write values are written directly to the Attention Indicator Control and Power Indicator Control fields. [bhelgaas: changelog, rename flag and accessors to s/attention/indicator/] Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 2599a980340f..c81fbf7d5e9e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -308,6 +308,9 @@ struct pci_dev { powered on/off by the corresponding bridge */ unsigned int ignore_hotplug:1; /* Ignore hotplug events */ + unsigned int hotplug_user_indicators:1; /* SlotCtl indicators + controlled exclusively by + user sysfs */ unsigned int d3_delay; /* D3->D0 transition time in ms */ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ -- cgit v1.2.3 From bcac25a58bfc6bd79191ac5d7afb49bea96da8c9 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 6 Sep 2016 00:47:13 -0700 Subject: kernel: add a helper to get an owning user namespace for a namespace Return -EPERM if an owning user namespace is outside of a process current user namespace. v2: In a first version ns_get_owner returned ENOENT for init_user_ns. This special cases was removed from this version. There is nothing outside of init_user_ns, so we can return EPERM. v3: rename ns->get_owner() to ns->owner(). get_* usually means that it grabs a reference. Acked-by: Serge Hallyn Signed-off-by: Andrei Vagin Signed-off-by: Eric W. Biederman --- include/linux/proc_ns.h | 1 + include/linux/user_namespace.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index de0e7719d4c5..ca85a4348ffc 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -18,6 +18,7 @@ struct proc_ns_operations { struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); + struct user_namespace *(*owner)(struct ns_common *ns); }; extern const struct proc_ns_operations netns_operations; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 9217169c64cb..190cf0760815 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -73,6 +73,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); extern bool current_in_userns(const struct user_namespace *target_ns); + +struct ns_common *ns_get_owner(struct ns_common *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -106,6 +108,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns) { return true; } + +static inline struct ns_common *ns_get_owner(struct ns_common *ns) +{ + return ERR_PTR(-EPERM); +} #endif #endif /* _LINUX_USER_H */ -- cgit v1.2.3 From a7306ed8d94af729ecef8b6e37506a1c6fc14788 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Tue, 6 Sep 2016 00:47:15 -0700 Subject: nsfs: add ioctl to get a parent namespace Pid and user namepaces are hierarchical. There is no way to discover parent-child relationships. In a future we will use this interface to dump and restore nested namespaces. Acked-by: Serge Hallyn Signed-off-by: Andrei Vagin Signed-off-by: Eric W. Biederman --- include/linux/proc_ns.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index ca85a4348ffc..12cb8bd81d2d 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -19,6 +19,7 @@ struct proc_ns_operations { void (*put)(struct ns_common *ns); int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); struct user_namespace *(*owner)(struct ns_common *ns); + struct ns_common *(*get_parent)(struct ns_common *ns); }; extern const struct proc_ns_operations netns_operations; -- cgit v1.2.3 From d45bc58dd3bdcaabc1d7d8d9b0b8dee826635cc6 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Wed, 27 Jul 2016 11:23:52 +0200 Subject: mtd: nand: import nand_hw_control_init() The code to initialize a struct nand_hw_control is duplicated across several drivers. Factorize it using an inline function. Signed-off-by: Marc Gonzalez Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 8dd6e01f45c0..f6a2d5e7313c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -460,6 +460,13 @@ struct nand_hw_control { wait_queue_head_t wq; }; +static inline void nand_hw_control_init(struct nand_hw_control *nfc) +{ + nfc->active = NULL; + spin_lock_init(&nfc->lock); + init_waitqueue_head(&nfc->wq); +} + /** * struct nand_ecc_ctrl - Control structure for ECC * @mode: ECC mode -- cgit v1.2.3 From 79022591839f110f465cac0223e117b91d47d5db Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Sep 2016 14:21:42 +0200 Subject: mtd: nand: remove unnecessary 'extern' from function declarations 'extern' is not necessary for function declarations. To prevent people from adding the keyword to new declarations remove the existing ones. Signed-off-by: Sascha Hauer Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index f6a2d5e7313c..28c1833ad708 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -29,26 +29,26 @@ struct nand_flash_dev; struct device_node; /* Scan and identify a NAND device */ -extern int nand_scan(struct mtd_info *mtd, int max_chips); +int nand_scan(struct mtd_info *mtd, int max_chips); /* * Separate phases of nand_scan(), allowing board driver to intervene * and override command or ECC setup according to flash type. */ -extern int nand_scan_ident(struct mtd_info *mtd, int max_chips, +int nand_scan_ident(struct mtd_info *mtd, int max_chips, struct nand_flash_dev *table); -extern int nand_scan_tail(struct mtd_info *mtd); +int nand_scan_tail(struct mtd_info *mtd); /* Free resources held by the NAND device */ -extern void nand_release(struct mtd_info *mtd); +void nand_release(struct mtd_info *mtd); /* Internal helper for board drivers which need to override command function */ -extern void nand_wait_ready(struct mtd_info *mtd); +void nand_wait_ready(struct mtd_info *mtd); /* locks all blocks present in the device */ -extern int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); /* unlocks specified locked blocks */ -extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); /* The maximum number of NAND chips in an array */ #define NAND_MAX_CHIPS 8 @@ -900,14 +900,14 @@ struct nand_manufacturers { extern struct nand_flash_dev nand_flash_ids[]; extern struct nand_manufacturers nand_manuf_ids[]; -extern int nand_default_bbt(struct mtd_info *mtd); -extern int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); -extern int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); -extern int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); -extern int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, - int allowbbt); -extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, uint8_t *buf); +int nand_default_bbt(struct mtd_info *mtd); +int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); +int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); +int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); +int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, + int allowbbt); +int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, uint8_t *buf); /** * struct platform_nand_chip - chip level device structure -- cgit v1.2.3 From 2f94abfe35b210e7711af9202a3dcfc9e779219a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 15 Sep 2016 10:32:45 +0200 Subject: mtd: nand: Create a NAND reset function When NAND devices are resetted some initialization may have to be done, like for example they have to be configured for the timing mode that shall be used. To get a common place where this initialization can be implemented create a nand_reset() function. This currently only issues a NAND_CMD_RESET to the NAND device. The places issuing this command manually are replaced with a call to nand_reset(). Signed-off-by: Sascha Hauer Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 28c1833ad708..73ccbf6e057c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1100,4 +1100,8 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); /* Default read_oob syndrome implementation */ int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page); + +/* Reset and initialize a NAND device */ +int nand_reset(struct nand_chip *chip); + #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From eee64b700e26b9bcc6fce024681c31f5e12271fc Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 15 Sep 2016 10:32:46 +0200 Subject: mtd: nand: Introduce nand_data_interface Currently we have no data structure to fully describe a NAND timing. We only have struct nand_sdr_timings for NAND timings in SDR mode, but nothing for DDR mode and also no container to store both types of timing. This patch adds struct nand_data_interface which stores the timing type and a union of different timings. This can be used to pass to drivers in order to configure the timing. Add kerneldoc for struct nand_sdr_timings while touching it anyway. Signed-off-by: Sascha Hauer Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 166 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 117 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 73ccbf6e057c..a625e960c0c3 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -572,6 +572,123 @@ struct nand_buffers { uint8_t *databuf; }; +/** + * struct nand_sdr_timings - SDR NAND chip timings + * + * This struct defines the timing requirements of a SDR NAND chip. + * These information can be found in every NAND datasheets and the timings + * meaning are described in the ONFI specifications: + * www.onfi.org/~/media/ONFI/specs/onfi_3_1_spec.pdf (chapter 4.15 Timing + * Parameters) + * + * All these timings are expressed in picoseconds. + * + * @tALH_min: ALE hold time + * @tADL_min: ALE to data loading time + * @tALS_min: ALE setup time + * @tAR_min: ALE to RE# delay + * @tCEA_max: CE# access time + * @tCEH_min: + * @tCH_min: CE# hold time + * @tCHZ_max: CE# high to output hi-Z + * @tCLH_min: CLE hold time + * @tCLR_min: CLE to RE# delay + * @tCLS_min: CLE setup time + * @tCOH_min: CE# high to output hold + * @tCS_min: CE# setup time + * @tDH_min: Data hold time + * @tDS_min: Data setup time + * @tFEAT_max: Busy time for Set Features and Get Features + * @tIR_min: Output hi-Z to RE# low + * @tITC_max: Interface and Timing Mode Change time + * @tRC_min: RE# cycle time + * @tREA_max: RE# access time + * @tREH_min: RE# high hold time + * @tRHOH_min: RE# high to output hold + * @tRHW_min: RE# high to WE# low + * @tRHZ_max: RE# high to output hi-Z + * @tRLOH_min: RE# low to output hold + * @tRP_min: RE# pulse width + * @tRR_min: Ready to RE# low (data only) + * @tRST_max: Device reset time, measured from the falling edge of R/B# to the + * rising edge of R/B#. + * @tWB_max: WE# high to SR[6] low + * @tWC_min: WE# cycle time + * @tWH_min: WE# high hold time + * @tWHR_min: WE# high to RE# low + * @tWP_min: WE# pulse width + * @tWW_min: WP# transition to WE# low + */ +struct nand_sdr_timings { + u32 tALH_min; + u32 tADL_min; + u32 tALS_min; + u32 tAR_min; + u32 tCEA_max; + u32 tCEH_min; + u32 tCH_min; + u32 tCHZ_max; + u32 tCLH_min; + u32 tCLR_min; + u32 tCLS_min; + u32 tCOH_min; + u32 tCS_min; + u32 tDH_min; + u32 tDS_min; + u32 tFEAT_max; + u32 tIR_min; + u32 tITC_max; + u32 tRC_min; + u32 tREA_max; + u32 tREH_min; + u32 tRHOH_min; + u32 tRHW_min; + u32 tRHZ_max; + u32 tRLOH_min; + u32 tRP_min; + u32 tRR_min; + u64 tRST_max; + u32 tWB_max; + u32 tWC_min; + u32 tWH_min; + u32 tWHR_min; + u32 tWP_min; + u32 tWW_min; +}; + +/** + * enum nand_data_interface_type - NAND interface timing type + * @NAND_SDR_IFACE: Single Data Rate interface + */ +enum nand_data_interface_type { + NAND_SDR_IFACE, +}; + +/** + * struct nand_data_interface - NAND interface timing + * @type: type of the timing + * @timings: The timing, type according to @type + */ +struct nand_data_interface { + enum nand_data_interface_type type; + union { + struct nand_sdr_timings sdr; + } timings; +}; + +/** + * nand_get_sdr_timings - get SDR timing from data interface + * @conf: The data interface + */ +static inline const struct nand_sdr_timings * +nand_get_sdr_timings(const struct nand_data_interface *conf) +{ + if (conf->type != NAND_SDR_IFACE) + return ERR_PTR(-EINVAL); + + return &conf->timings.sdr; +} + /** * struct nand_chip - NAND Private Flash Chip Data * @mtd: MTD device registered to the MTD framework @@ -1030,55 +1147,6 @@ static inline int jedec_feature(struct nand_chip *chip) : 0; } -/* - * struct nand_sdr_timings - SDR NAND chip timings - * - * This struct defines the timing requirements of a SDR NAND chip. - * These informations can be found in every NAND datasheets and the timings - * meaning are described in the ONFI specifications: - * www.onfi.org/~/media/ONFI/specs/onfi_3_1_spec.pdf (chapter 4.15 Timing - * Parameters) - * - * All these timings are expressed in picoseconds. - */ - -struct nand_sdr_timings { - u32 tALH_min; - u32 tADL_min; - u32 tALS_min; - u32 tAR_min; - u32 tCEA_max; - u32 tCEH_min; - u32 tCH_min; - u32 tCHZ_max; - u32 tCLH_min; - u32 tCLR_min; - u32 tCLS_min; - u32 tCOH_min; - u32 tCS_min; - u32 tDH_min; - u32 tDS_min; - u32 tFEAT_max; - u32 tIR_min; - u32 tITC_max; - u32 tRC_min; - u32 tREA_max; - u32 tREH_min; - u32 tRHOH_min; - u32 tRHW_min; - u32 tRHZ_max; - u32 tRLOH_min; - u32 tRP_min; - u32 tRR_min; - u64 tRST_max; - u32 tWB_max; - u32 tWC_min; - u32 tWH_min; - u32 tWHR_min; - u32 tWP_min; - u32 tWW_min; -}; - /* get timing characteristics from ONFI timing mode. */ const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode); -- cgit v1.2.3 From b88730ada99bfe243862add360720a3550b0edbf Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 15 Sep 2016 10:32:48 +0200 Subject: mtd: nand: Add function to convert ONFI mode to data_interface onfi_init_data_interface() initializes a data interface with values from a given ONFI mode. Signed-off-by: Sascha Hauer Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index a625e960c0c3..1f34c04fe16c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1112,6 +1112,11 @@ static inline int onfi_get_sync_timing_mode(struct nand_chip *chip) return le16_to_cpu(chip->onfi_params.src_sync_timing_mode); } +int onfi_init_data_interface(struct nand_chip *chip, + struct nand_data_interface *iface, + enum nand_data_interface_type type, + int timing_mode); + /* * Check if it is a SLC nand. * The !nand_is_slc() can be used to check the MLC/TLC nand chips. -- cgit v1.2.3 From 6e1f9708dbf3c50a8da93c1952a01a7a2acb5e66 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 15 Sep 2016 10:32:49 +0200 Subject: mtd: nand: Expose data interface for ONFI mode 0 The nand layer will need ONFI mode 0 to use it as timing mode before and right after reset. Signed-off-by: Sascha Hauer Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 1f34c04fe16c..0c9412c2d80b 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1154,6 +1154,8 @@ static inline int jedec_feature(struct nand_chip *chip) /* get timing characteristics from ONFI timing mode. */ const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode); +/* get data interface from ONFI timing mode 0, used after reset. */ +const struct nand_data_interface *nand_get_default_data_interface(void); int nand_check_erased_ecc_chunk(void *data, int datalen, void *ecc, int ecclen, -- cgit v1.2.3 From d8e725dd831186a3595036b2b1df9f68cbc6efa3 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 15 Sep 2016 10:32:50 +0200 Subject: mtd: nand: automate NAND timings selection The NAND framework provides several helpers to query timing modes supported by a NAND chip, but this implies that all NAND controller drivers have to implement the same timings selection dance. Also currently NAND devices can be resetted at arbitrary places which also resets the timing for ONFI chips to timing mode 0. Provide a common logic to select the best timings based on ONFI or ->onfi_timing_mode_default information. Hook this into nand_reset() to make sure the new timing is applied each time during a reset. NAND controller willing to support timings adjustment should just implement the ->setup_data_interface() method. Signed-off-by: Boris Brezillon Signed-off-by: Sascha Hauer --- include/linux/mtd/nand.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 0c9412c2d80b..d3e3f8d03336 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -751,10 +751,9 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * also from the datasheet. It is the recommended ECC step * size, if known; if unknown, set to zero. * @onfi_timing_mode_default: [INTERN] default ONFI timing mode. This field is - * either deduced from the datasheet if the NAND - * chip is not ONFI compliant or set to 0 if it is - * (an ONFI chip is always configured in mode 0 - * after a NAND reset) + * set to the actually used ONFI mode if the chip is + * ONFI compliant or deduced from the datasheet if + * the NAND chip is not ONFI compliant. * @numchips: [INTERN] number of physical chips * @chipsize: [INTERN] the size of one chip for multichip arrays * @pagemask: [INTERN] page number mask = number of (pages / chip) - 1 @@ -774,6 +773,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * @read_retries: [INTERN] the number of read retry modes supported * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand + * @setup_data_interface: [OPTIONAL] setup the data interface and timing * @bbt: [INTERN] bad block table pointer * @bbt_td: [REPLACEABLE] bad block table descriptor for flash * lookup. @@ -820,6 +820,10 @@ struct nand_chip { int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode); + int (*setup_data_interface)(struct mtd_info *mtd, + const struct nand_data_interface *conf, + bool check_only); + int chip_delay; unsigned int options; @@ -849,6 +853,8 @@ struct nand_chip { struct nand_jedec_params jedec_params; }; + struct nand_data_interface *data_interface; + int read_retries; flstate_t state; -- cgit v1.2.3 From ba78ee00e1ff84de9b3ad33edbd3ec599099ee82 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 8 Jun 2016 17:04:22 +0200 Subject: mtd: nand: Add an option to maximize the ECC strength The generic NAND DT bindings allows one to tweak the ECC strength and step size to their need. It can be used to lower the ECC strength to match a bootloader/firmware config, but might also be used to get a better reliability. In the latter case, the user might want to use the maximum ECC strength without having to explicitly calculate the exact value (this value not only depends on the OOB size, but also on the NAND controller, and can be tricky to extract). Add a generic 'nand-ecc-maximize' DT property and the associated NAND_ECC_MAXIMIZE flag, to let ECC controller drivers select the best ECC strength and step-size on their own. Signed-off-by: Boris Brezillon Acked-by: Rob Herring --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index d3e3f8d03336..331caf987b16 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -141,6 +141,7 @@ enum nand_ecc_algo { * pages and you want to rely on the default implementation. */ #define NAND_ECC_GENERIC_ERASED_CHECK BIT(0) +#define NAND_ECC_MAXIMIZE BIT(1) /* Bit mask for flags passed to do_nand_read_ecc */ #define NAND_GET_DEVICE 0x80 -- cgit v1.2.3 From d44154f969a44269a9288c274c1c2fd9e85df8a5 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 21 Sep 2016 11:44:41 +0200 Subject: mtd: nand: Provide nand_cleanup() function to free NAND related resources Provide a nand_cleanup() function to free all nand related resources without unregistering the mtd device. This should allow drivers to call mtd_device_unregister() and handle its return value and still being able to cleanup all nand related resources. Signed-off-by: Richard Weinberger Signed-off-by: Daniel Walter Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 331caf987b16..c5d3d5024fc8 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -38,7 +38,7 @@ int nand_scan_ident(struct mtd_info *mtd, int max_chips, struct nand_flash_dev *table); int nand_scan_tail(struct mtd_info *mtd); -/* Free resources held by the NAND device */ +/* Unregister the MTD device and free resources held by the NAND device */ void nand_release(struct mtd_info *mtd); /* Internal helper for board drivers which need to override command function */ @@ -1186,4 +1186,7 @@ int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, /* Reset and initialize a NAND device */ int nand_reset(struct nand_chip *chip); +/* Free resources held by the NAND device */ +void nand_cleanup(struct nand_chip *chip); + #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From 79b804cb6af4f128b2c53f0887c02537a7eb5824 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 20 Sep 2016 15:15:21 +0300 Subject: gpiolib: Make it possible to exclude GPIOs from IRQ domain When using GPIO irqchip helpers to setup irqchip for a gpiolib based driver, it is not possible to select which GPIOs to add to the IRQ domain. Instead it just adds all GPIOs which is not always desired. For example there might be GPIOs that for some reason cannot generated normal interrupts at all. To support this we add a flag irq_need_valid_mask to struct gpio_chip. When this flag is set the core allocates irq_valid_mask that holds one bit for each GPIO the chip has. By default all bits are set but drivers can manipulate this using set_bit() and clear_bit() accordingly. Then when gpiochip_irqchip_add() is called, this mask is checked and all GPIOs with bit is set are added to the IRQ domain created for the GPIO chip. Suggested-by: Linus Walleij Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 50882e09289b..420b837f2aa7 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -112,6 +112,10 @@ enum single_ended_mode { * initialization, provided by GPIO driver * @irq_parent: GPIO IRQ chip parent/bank linux irq number, * provided by GPIO driver + * @irq_need_valid_mask: If set core allocates @irq_valid_mask with all + * bits set to one + * @irq_valid_mask: If not %NULL holds bitmask of GPIOs which are valid to + * be included in IRQ domain of the chip * @lock_key: per GPIO IRQ chip lockdep class * * A gpio_chip can help platforms abstract various sources of GPIOs so @@ -190,6 +194,8 @@ struct gpio_chip { irq_flow_handler_t irq_handler; unsigned int irq_default_type; int irq_parent; + bool irq_need_valid_mask; + unsigned long *irq_valid_mask; struct lock_class_key *lock_key; #endif -- cgit v1.2.3 From cace564f8b6260e806f5e28d7f192fd0e0c603ed Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 13 Sep 2016 10:52:50 -0400 Subject: svcrdma: Tail iovec leaves an orphaned DMA mapping The ctxt's count field is overloaded to mean the number of pages in the ctxt->page array and the number of SGEs in the ctxt->sge array. Typically these two numbers are the same. However, when an inline RPC reply is constructed from an xdr_buf with a tail iovec, the head and tail often occupy the same page, but each are DMA mapped independently. In that case, ->count equals the number of pages, but it does not equal the number of SGEs. There's one more SGE, for the tail iovec. Hence there is one more DMA mapping than there are pages in the ctxt->page array. This isn't a real problem until the server's iommu is enabled. Then each RPC reply that has content in that iovec orphans a DMA mapping that consists of real resources. krb5i and krb5p always populate that tail iovec. After a couple million sent krb5i/p RPC replies, the NFS server starts behaving erratically. Reboot is needed to clear the problem. Fixes: 9d11b51ce7c1 ("svcrdma: Fix send_reply() scatter/gather set-up") Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d6917b896d3a..3584bc8864c4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -86,6 +86,7 @@ struct svc_rdma_op_ctxt { unsigned long flags; enum dma_data_direction direction; int count; + unsigned int mapped_sges; struct ib_sge sge[RPCSVC_MAXPAGES]; struct page *pages[RPCSVC_MAXPAGES]; }; @@ -193,6 +194,14 @@ struct svcxprt_rdma { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD +/* Track DMA maps for this transport and context */ +static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt) +{ + ctxt->mapped_sges++; + atomic_inc(&rdma->sc_dma_used); +} + /* svc_rdma_backchannel.c */ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, -- cgit v1.2.3 From 5d48709656584420f31b361c4b1a3ebf1d68b225 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 13 Sep 2016 10:53:07 -0400 Subject: rpcrdma: RDMA/CM private message data structure Introduce data structure used by both client and server to exchange implementation details during RDMA/CM connection establishment. This is an experimental out-of-band exchange between Linux RPC-over-RDMA Version One implementations, replacing the deprecated CCP (see RFC 5666bis). The purpose of this extension is to enable prototyping of features that might be introduced in a subsequent version of RPC-over-RDMA. Suggested by Christoph Hellwig and Devesh Sharma. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/rpc_rdma.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 3b1ff38f0c37..a7da6bf56610 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -41,6 +41,7 @@ #define _LINUX_SUNRPC_RPC_RDMA_H #include +#include #define RPCRDMA_VERSION 1 #define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) @@ -129,4 +130,38 @@ enum rpcrdma_proc { #define rdma_done cpu_to_be32(RDMA_DONE) #define rdma_error cpu_to_be32(RDMA_ERROR) +/* + * Private extension to RPC-over-RDMA Version One. + * Message passed during RDMA-CM connection set-up. + * + * Add new fields at the end, and don't permute existing + * fields. + */ +struct rpcrdma_connect_private { + __be32 cp_magic; + u8 cp_version; + u8 cp_flags; + u8 cp_send_size; + u8 cp_recv_size; +} __packed; + +#define rpcrdma_cmp_magic __cpu_to_be32(0xf6ab0e18) + +enum { + RPCRDMA_CMP_VERSION = 1, + RPCRDMA_CMP_F_SND_W_INV_OK = BIT(0), +}; + +static inline u8 +rpcrdma_encode_buffer_size(unsigned int size) +{ + return (size >> 10) - 1; +} + +static inline unsigned int +rpcrdma_decode_buffer_size(u8 val) +{ + return ((unsigned int)val + 1) << 10; +} + #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ -- cgit v1.2.3 From 25d55296dd3eac23adb2ae46b67b65bf73b22fb2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 13 Sep 2016 10:53:23 -0400 Subject: svcrdma: support Remote Invalidation Support Remote Invalidation. A private message is exchanged with the client upon RDMA transport connect that indicates whether Send With Invalidation may be used by the server to send RPC replies. The invalidate_rkey is arbitrarily chosen from among rkeys present in the RPC-over-RDMA header's chunk lists. Send With Invalidate improves performance only when clients can recognize, while processing an RPC reply, that an rkey has already been invalidated. That has been submitted as a separate change. In the future, the RPC-over-RDMA protocol might support Remote Invalidation properly. The protocol needs to enable signaling between peers to indicate when Remote Invalidation can be used for each individual RPC. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 3584bc8864c4..cc3ae16eac68 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -137,6 +137,7 @@ struct svcxprt_rdma { int sc_ord; /* RDMA read limit */ int sc_max_sge; int sc_max_sge_rd; /* max sge for read target */ + bool sc_snd_w_inv; /* OK to use Send With Invalidate */ atomic_t sc_sq_count; /* Number of SQ WR on queue */ unsigned int sc_sq_depth; /* Depth of SQ */ -- cgit v1.2.3 From 55679c8d23d191c24ad133abc5647e3054ca8de1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 23 Sep 2016 09:07:56 -0700 Subject: blkcg: Annotate blkg_hint correctly Avoid that sparse complains about blkg_hint manipulations. Fixes: a637120e4902 ("blkcg: use radix tree to index blkgs from blkcg") Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 10648e300c93..cbdbf34de5b6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -45,7 +45,7 @@ struct blkcg { spinlock_t lock; struct radix_tree_root blkg_tree; - struct blkcg_gq *blkg_hint; + struct blkcg_gq __rcu *blkg_hint; struct hlist_head blkg_list; struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; -- cgit v1.2.3 From 5963f19ca2b7e46cafc9647c8390bb20563b91cc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 23 Sep 2016 21:29:36 +0900 Subject: clk: change the type of clk_hw_onecell_data.num to unsigned int The "num" is the number of clk_hw entries in the structure, so "unsigned int" would be a better fit. (size_t looks like data size we count by byte.) Besides, struct clk_onecell_data already uses unsigned int for "clk_num". Signed-off-by: Masahiro Yamada Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 37b8fdce0e49..af596381fa0f 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -772,7 +772,7 @@ struct clk_onecell_data { }; struct clk_hw_onecell_data { - size_t num; + unsigned int num; struct clk_hw *hws[]; }; -- cgit v1.2.3 From 058dfc7670086edda8d34f0dbe93c596db5d4a6b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 20 Sep 2016 15:30:51 +0300 Subject: ACPI / watchdog: Add support for WDAT hardware watchdog Starting from Intel Skylake the iTCO watchdog timer registers were moved to reside in the same register space with SMBus host controller. Not all needed registers are available though and we need to unhide P2SB (Primary to Sideband) device briefly to be able to read status of required NO_REBOOT bit. The i2c-i801.c SMBus driver used to handle this and creation of the iTCO watchdog platform device. Windows, on the other hand, does not use the iTCO watchdog hardware directly even if it is available. Instead it relies on ACPI Watchdog Action Table (WDAT) table to describe the watchdog hardware to the OS. This table contains necessary information about the the hardware and also set of actions which are executed by a driver as needed. This patch implements a new watchdog driver that takes advantage of the ACPI WDAT table. We split the functionality into two parts: first part enumerates the WDAT table and if found, populates resources and creates platform device for the actual driver. The second part is the driver itself. The reason for the split is that this way we can make the driver itself to be a module and loaded automatically if the WDAT table is found. Otherwise the module is not loaded. Signed-off-by: Mika Westerberg Reviewed-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c5eaf2f80a4c..8ff6ca4a2639 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1074,4 +1074,10 @@ void acpi_table_upgrade(void); static inline void acpi_table_upgrade(void) { } #endif +#if defined(CONFIG_ACPI) && defined(CONFIG_ACPI_WATCHDOG) +extern bool acpi_has_watchdog(void); +#else +static inline bool acpi_has_watchdog(void) { return false; } +#endif + #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From df044e02206230c7d79a9aef96a6c087476f5533 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 31 Aug 2016 14:52:41 +0300 Subject: watchdog: add pretimeout support to the core Since the watchdog framework centrializes the IOCTL interfaces of device drivers now, SETPRETIMEOUT and GETPRETIMEOUT need to be added in the common code. Signed-off-by: Robin Gong Signed-off-by: Wolfram Sang [vzapolskiy: added conditional pretimeout sysfs attribute visibility] Signed-off-by: Vladimir Zapolskiy Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 7047bc7f8106..4035df7ec023 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -28,6 +28,7 @@ struct watchdog_core_data; * @ping: The routine that sends a keepalive ping to the watchdog device. * @status: The routine that shows the status of the watchdog device. * @set_timeout:The routine for setting the watchdog devices timeout value (in seconds). + * @set_pretimeout:The routine for setting the watchdog devices pretimeout. * @get_timeleft:The routine that gets the time left before a reset (in seconds). * @restart: The routine for restarting the machine. * @ioctl: The routines that handles extra ioctl calls. @@ -46,6 +47,7 @@ struct watchdog_ops { int (*ping)(struct watchdog_device *); unsigned int (*status)(struct watchdog_device *); int (*set_timeout)(struct watchdog_device *, unsigned int); + int (*set_pretimeout)(struct watchdog_device *, unsigned int); unsigned int (*get_timeleft)(struct watchdog_device *); int (*restart)(struct watchdog_device *, unsigned long, void *); long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long); @@ -61,6 +63,7 @@ struct watchdog_ops { * @ops: Pointer to the list of watchdog operations. * @bootstatus: Status of the watchdog device at boot. * @timeout: The watchdog devices timeout value (in seconds). + * @pretimeout: The watchdog devices pre_timeout value. * @min_timeout:The watchdog devices minimum timeout value (in seconds). * @max_timeout:The watchdog devices maximum timeout value (in seconds) * as configurable from user space. Only relevant if @@ -96,6 +99,7 @@ struct watchdog_device { const struct watchdog_ops *ops; unsigned int bootstatus; unsigned int timeout; + unsigned int pretimeout; unsigned int min_timeout; unsigned int max_timeout; unsigned int min_hw_heartbeat_ms; @@ -163,6 +167,13 @@ static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigne t > wdd->max_timeout); } +/* Use the following function to check if a pretimeout value is invalid */ +static inline bool watchdog_pretimeout_invalid(struct watchdog_device *wdd, + unsigned int t) +{ + return t && wdd->timeout && t >= wdd->timeout; +} + /* Use the following functions to manipulate watchdog driver specific data */ static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data) { -- cgit v1.2.3 From 907241dccb4ce5d9413cf3c030b32b0cfc184914 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 23 Sep 2016 18:24:07 +0100 Subject: thread_info: Use unsigned long for flags The generic THREAD_INFO_IN_TASK definition of thread_info::flags is a u32, matching x86 prior to the introduction of THREAD_INFO_IN_TASK. However, common helpers like test_ti_thread_flag() implicitly assume that thread_info::flags has at least the size and alignment of unsigned long, and relying on padding and alignment provided by other elements of task_struct is somewhat fragile. Additionally, some architectures use more that 32 bits for thread_info::flags, and others may need to in future. With THREAD_INFO_IN_TASK, task struct follows thread_info with a long field, and thus we no longer save any space as we did back in commit: affa219b60a11b32 ("x86: change thread_info's flag field back to 32 bits") Given all this, it makes more sense for the generic thread_info::flags to be an unsigned long. In fact given contains/uses the helpers mentioned above, BE arches *must* use unsigned long (or something of the same size) today, or they wouldn't work. Make it so. Signed-off-by: Mark Rutland Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1474651447-30447-1-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e2d0fd81b1ba..45f004e9cc59 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -15,7 +15,7 @@ struct compat_timespec; #ifdef CONFIG_THREAD_INFO_IN_TASK struct thread_info { - u32 flags; /* low level flags */ + unsigned long flags; /* low level flags */ }; #define INIT_THREAD_INFO(tsk) \ -- cgit v1.2.3 From 8dd1fe1594a712eb326e1607c694fbd1baf85f4b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 16 Sep 2016 18:02:42 +0200 Subject: i2c: export i2c_adapter_depth() For crazy setups in which an i2c gpio expander is behind an i2c gpio multiplexer controlled by a gpio provided a second expander using the same device driver we need to explicitly tell lockdep how to handle nested locking. Export i2c_adapter_depth() as public API to be reused outside of i2c core code. Signed-off-by: Bartosz Golaszewski Acked-by: Peter Rosin Acked-by: Peter Zijlstra (Intel) Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4a4099d3a4b9..6422eef428c4 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -698,6 +698,7 @@ extern void i2c_clients_command(struct i2c_adapter *adap, extern struct i2c_adapter *i2c_get_adapter(int nr); extern void i2c_put_adapter(struct i2c_adapter *adap); +extern unsigned int i2c_adapter_depth(struct i2c_adapter *adapter); void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_defaults); -- cgit v1.2.3 From a5ecddfe0bf82c92cc2e47385e287b21361a8d70 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 16 Sep 2016 18:02:43 +0200 Subject: lockdep: make MAX_LOCKDEP_SUBCLASSES unconditionally visible This define is needed by i2c_adapter_depth() to detect if we don't exceed the maximum number of lock subclasses. Make it visible even if lockdep is disabled. Signed-off-by: Bartosz Golaszewski Acked-by: Peter Rosin Acked-by: Peter Zijlstra (Intel) Signed-off-by: Wolfram Sang --- include/linux/lockdep.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index eabe0138eb06..c1458fede1f9 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -16,6 +16,8 @@ struct lockdep_map; extern int prove_locking; extern int lock_stat; +#define MAX_LOCKDEP_SUBCLASSES 8UL + #ifdef CONFIG_LOCKDEP #include @@ -29,8 +31,6 @@ extern int lock_stat; */ #define XXX_LOCK_USAGE_STATES (1+3*4) -#define MAX_LOCKDEP_SUBCLASSES 8UL - /* * NR_LOCKDEP_CACHING_CLASSES ... Number of classes * cached in the instance of lockdep_map -- cgit v1.2.3 From 7c3d21c8153c6bfb5690e35e086b0522c42442d9 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:13 +0300 Subject: net/mlx4_core: Preparation for VF vlan protocol 802.1ad Check device capability to support VF vlan protocol 802.1ad mode. Add vport attribute vlan protocol. Init vport vlan protocol by default to 802.1Q. Add update QP support for VF vlan protocol 802.1ad. Add func capability vlan_offload_disable to disable all vlan HW acceleration on VF while the VF is set to VF vlan protocol 802.1ad mode. No change in VF vlan protocol 802.1Q (VST) mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 +++ include/linux/mlx4/qp.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 42da3552f7cb..59b50d3eedb4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -221,6 +221,7 @@ enum { MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, + MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36, }; enum { @@ -1371,6 +1372,8 @@ int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index deaa2217214d..b4ee8f62ce8d 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -160,6 +160,7 @@ struct mlx4_qp_path { enum { /* fl */ MLX4_FL_CV = 1 << 6, + MLX4_FL_SV = 1 << 5, MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, @@ -267,6 +268,7 @@ enum { MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, + MLX4_UPD_QP_PATH_MASK_SV = 22 + 32, }; enum { /* param3 */ -- cgit v1.2.3 From 79aab093a0b5370d7fc4e99df75996f4744dc03f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:15 +0300 Subject: net: Update API for VF vlan protocol 802.1ad support Introduce new rtnl UAPI that exposes a list of vlans per VF, giving the ability for user-space application to specify it for the VF, as an option to support 802.1ad. We adjusted IP Link tool to support this option. For future use cases, the new UAPI supports multiple vlans. For now we limit the list size to a single vlan in kernel. Add IFLA_VF_VLAN_LIST in addition to IFLA_VF_VLAN to keep backward compatibility with older versions of IP Link tool. Add a vlan protocol parameter to the ndo_set_vf_vlan callback. We kept 802.1Q as the drivers' default vlan protocol. Suitable ip link tool command examples: Set vf vlan protocol 802.1ad: ip link set eth0 vf 1 vlan 100 proto 802.1ad Set vf to VST (802.1Q) mode: ip link set eth0 vf 1 vlan 100 proto 802.1Q Or by omitting the new parameter ip link set eth0 vf 1 vlan 100 Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/if_link.h | 1 + include/linux/netdevice.h | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index f923d15b432c..0b17c585b5cd 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -25,5 +25,6 @@ struct ifla_vf_info { __u32 max_tx_rate; __u32 rss_query_en; __u32 trusted; + __be16 vlan_proto; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 69f242c71865..1e8a5c734d72 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -946,7 +946,8 @@ struct netdev_xdp { * * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); - * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); + * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, + * u8 qos, __be16 proto); * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); @@ -1187,7 +1188,8 @@ struct net_device_ops { int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, - int queue, u16 vlan, u8 qos); + int queue, u16 vlan, + u8 qos, __be16 proto); int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); -- cgit v1.2.3 From b42959dc35a533a531dd698b581193a65a5da831 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 22 Sep 2016 12:11:16 +0300 Subject: net/mlx4: Add VF vlan protocol 802.1ad support Move the vf to VST 802.1ad mode (mlx4 VST QinQ mode) by setting vf vlan protocol to 802.1ad. VST 802.1ad mode in mlx4, is used for STAG strip/insertion by PF, while the CTAG is set by the VF. Read current vlan protocol as part of the vf configuration state. Upon setting vf vlan protocol to 802.1ad, we use a mechanism of handshake to verify that both the vf and the pf driver version support it. The handshake uses the command QUERY_FUNC_CAP: - The vf sets a pre-defined support bit in input modifier. - A pf that supports the feature sends the request to the vf through a pre-defined field in the output mailbox. - In case vf does not support the feature, the pf will fail the control command (in this case, IP link tool command to set the vf vlan protocol to 802.1ad). No change in VST 802.1Q mode. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 116b284bc4ce..1f3568694a57 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -309,7 +309,8 @@ int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); -int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, + u8 qos, __be16 proto); int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, int max_tx_rate); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); -- cgit v1.2.3 From fe72926b792e52ab00abfa81a201805bfb2247d6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Sep 2016 11:35:02 -0400 Subject: netfilter: call nf_hook_state_init with rcu_read_lock held This makes things simpler because we can store the head of the list in the nf_state structure without worrying about concurrent add/delete of hook elements from the list. A future commit will make use of this to implement a simpler linked-list. Signed-off-by: Florian Westphal Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 8 +++++++- include/linux/netfilter_ingress.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9230f9aee896..ad444f0b4ed0 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -174,10 +174,16 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (!list_empty(hook_list)) { struct nf_hook_state state; + int ret; + /* We may already have this, but read-locks nest anyway */ + rcu_read_lock(); nf_hook_state_init(&state, hook_list, hook, thresh, pf, indev, outdev, sk, net, okfn); - return nf_hook_slow(skb, &state); + + ret = nf_hook_slow(skb, &state); + rcu_read_unlock(); + return ret; } return 1; } diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 5fcd375ef175..6965ba09eba7 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -14,6 +14,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) return !list_empty(&skb->dev->nf_hooks_ingress); } +/* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_state state; -- cgit v1.2.3 From e3b37f11e6e4e6b6f02cc762f182ce233d2c1c9d Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:07 -0400 Subject: netfilter: replace list_head with single linked list The netfilter hook list never uses the prev pointer, and so can be trimmed to be a simple singly-linked list. In addition to having a more light weight structure for hook traversal, struct net becomes 5568 bytes (down from 6400) and struct net_device becomes 2176 bytes (down from 2240). Signed-off-by: Aaron Conole Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 2 +- include/linux/netfilter.h | 63 +++++++++++++++++++++------------------ include/linux/netfilter_ingress.h | 17 +++++++---- 3 files changed, 47 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bb978470dc..41f49f5ab62a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1783,7 +1783,7 @@ struct net_device { #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS - struct list_head nf_hooks_ingress; + struct nf_hook_entry __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ad444f0b4ed0..44e20dac98a9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -55,12 +55,34 @@ struct nf_hook_state { struct net_device *out; struct sock *sk; struct net *net; - struct list_head *hook_list; + struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; +typedef unsigned int nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); +struct nf_hook_ops { + struct list_head list; + + /* User fills in from here down. */ + nf_hookfn *hook; + struct net_device *dev; + void *priv; + u_int8_t pf; + unsigned int hooknum; + /* Hooks are ordered in ascending priority. */ + int priority; +}; + +struct nf_hook_entry { + struct nf_hook_entry __rcu *next; + struct nf_hook_ops ops; + const struct nf_hook_ops *orig_ops; +}; + static inline void nf_hook_state_init(struct nf_hook_state *p, - struct list_head *hook_list, + struct nf_hook_entry *hook_entry, unsigned int hook, int thresh, u_int8_t pf, struct net_device *indev, @@ -76,26 +98,11 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->out = outdev; p->sk = sk; p->net = net; - p->hook_list = hook_list; + RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } -typedef unsigned int nf_hookfn(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state); - -struct nf_hook_ops { - struct list_head list; - /* User fills in from here down. */ - nf_hookfn *hook; - struct net_device *dev; - void *priv; - u_int8_t pf; - unsigned int hooknum; - /* Hooks are ordered in ascending priority. */ - int priority; -}; struct nf_sockopt_ops { struct list_head list; @@ -161,7 +168,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct net *, struct sock *, struct sk_buff *), int thresh) { - struct list_head *hook_list; + struct nf_hook_entry *hook_head; + int ret = 1; #ifdef HAVE_JUMP_LABEL if (__builtin_constant_p(pf) && @@ -170,22 +178,19 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, return 1; #endif - hook_list = &net->nf.hooks[pf][hook]; - - if (!list_empty(hook_list)) { + rcu_read_lock(); + hook_head = rcu_dereference(net->nf.hooks[pf][hook]); + if (hook_head) { struct nf_hook_state state; - int ret; - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); - nf_hook_state_init(&state, hook_list, hook, thresh, + nf_hook_state_init(&state, hook_head, hook, thresh, pf, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state); - rcu_read_unlock(); - return ret; } - return 1; + rcu_read_unlock(); + + return ret; } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 6965ba09eba7..33e37fb41d5d 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -11,23 +11,30 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) return false; #endif - return !list_empty(&skb->dev->nf_hooks_ingress); + return rcu_access_pointer(skb->dev->nf_hooks_ingress); } /* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { + struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; - nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, - NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, - skb->dev, NULL, NULL, dev_net(skb->dev), NULL); + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); } static inline void nf_hook_ingress_init(struct net_device *dev) { - INIT_LIST_HEAD(&dev->nf_hooks_ingress); + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); } #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) -- cgit v1.2.3 From 2cf750704bb6d7ed8c7d732e071dd1bc890ea5e8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 25 Sep 2016 23:08:31 +0200 Subject: ipmr, ip6mr: fix scheduling while atomic and a deadlock with ipmr_get_route Since the commit below the ipmr/ip6mr rtnl_unicast() code uses the portid instead of the previous dst_pid which was copied from in_skb's portid. Since the skb is new the portid is 0 at that point so the packets are sent to the kernel and we get scheduling while atomic or a deadlock (depending on where it happens) by trying to acquire rtnl two times. Also since this is RTM_GETROUTE, it can be triggered by a normal user. Here's the sleeping while atomic trace: [ 7858.212557] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 [ 7858.212748] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0 [ 7858.212881] 2 locks held by swapper/0/0: [ 7858.213013] #0: (((&mrt->ipmr_expire_timer))){+.-...}, at: [] call_timer_fn+0x5/0x350 [ 7858.213422] #1: (mfc_unres_lock){+.....}, at: [] ipmr_expire_process+0x25/0x130 [ 7858.213807] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.0-rc7+ #179 [ 7858.213934] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 7858.214108] 0000000000000000 ffff88005b403c50 ffffffff813a7804 0000000000000000 [ 7858.214412] ffffffff81a1338e ffff88005b403c78 ffffffff810a4a72 ffffffff81a1338e [ 7858.214716] 000000000000026c 0000000000000000 ffff88005b403ca8 ffffffff810a4b9f [ 7858.215251] Call Trace: [ 7858.215412] [] dump_stack+0x85/0xc1 [ 7858.215662] [] ___might_sleep+0x192/0x250 [ 7858.215868] [] __might_sleep+0x6f/0x100 [ 7858.216072] [] mutex_lock_nested+0x33/0x4d0 [ 7858.216279] [] ? netlink_lookup+0x25f/0x460 [ 7858.216487] [] rtnetlink_rcv+0x1b/0x40 [ 7858.216687] [] netlink_unicast+0x19c/0x260 [ 7858.216900] [] rtnl_unicast+0x20/0x30 [ 7858.217128] [] ipmr_destroy_unres+0xa9/0xf0 [ 7858.217351] [] ipmr_expire_process+0x8f/0x130 [ 7858.217581] [] ? ipmr_net_init+0x180/0x180 [ 7858.217785] [] ? ipmr_net_init+0x180/0x180 [ 7858.217990] [] call_timer_fn+0xa5/0x350 [ 7858.218192] [] ? call_timer_fn+0x5/0x350 [ 7858.218415] [] ? ipmr_net_init+0x180/0x180 [ 7858.218656] [] run_timer_softirq+0x260/0x640 [ 7858.218865] [] ? __do_softirq+0xbb/0x54f [ 7858.219068] [] __do_softirq+0xe8/0x54f [ 7858.219269] [] irq_exit+0xb8/0xc0 [ 7858.219463] [] smp_apic_timer_interrupt+0x42/0x50 [ 7858.219678] [] apic_timer_interrupt+0x8c/0xa0 [ 7858.219897] [] ? native_safe_halt+0x6/0x10 [ 7858.220165] [] ? trace_hardirqs_on+0xd/0x10 [ 7858.220373] [] default_idle+0x23/0x190 [ 7858.220574] [] arch_cpu_idle+0xf/0x20 [ 7858.220790] [] default_idle_call+0x4c/0x60 [ 7858.221016] [] cpu_startup_entry+0x39b/0x4d0 [ 7858.221257] [] rest_init+0x135/0x140 [ 7858.221469] [] start_kernel+0x50e/0x51b [ 7858.221670] [] ? early_idt_handler_array+0x120/0x120 [ 7858.221894] [] x86_64_start_reservations+0x2a/0x2c [ 7858.222113] [] x86_64_start_kernel+0x13b/0x14a Fixes: 2942e9005056 ("[RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 +- include/linux/mroute6.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index d351fd3e1049..e5fb81376e92 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -120,5 +120,5 @@ struct mfc_cache { struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #endif diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 3987b64040c5..19a1c0c2993b 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -116,7 +116,7 @@ struct mfc6_cache { struct rtmsg; extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #ifdef CONFIG_IPV6_MROUTE extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb); -- cgit v1.2.3 From ba409b31b3d37b52dda4eefcde04f5837c7ee4aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 10 Aug 2016 13:22:14 +0200 Subject: dma-mapping: add {map,unmap}_resource to dma_map_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add methods to handle mapping of device resources from a physical address. This is needed for example to be able to map MMIO FIFO registers to a IOMMU. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- include/linux/dma-mapping.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 66533e18276c..673b7be85f7a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -95,6 +95,12 @@ struct dma_map_ops { struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs); + dma_addr_t (*map_resource)(struct device *dev, phys_addr_t phys_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*unmap_resource)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); void (*sync_single_for_cpu)(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); -- cgit v1.2.3 From 0e74b34dfc3318bf4c7e51349d453d49fb8e9e16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 10 Aug 2016 13:22:15 +0200 Subject: dma-debug: add support for resource mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A MMIO mapped resource can not be represented by a struct page so a new debug type is needed to handle this. This patch add such type and functionality to add/remove entries and how to translate them to a physical address. Signed-off-by: Niklas Söderlund Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- include/linux/dma-debug.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index fe8cb610deac..c7d844f09c3a 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -56,6 +56,13 @@ extern void debug_dma_alloc_coherent(struct device *dev, size_t size, extern void debug_dma_free_coherent(struct device *dev, size_t size, void *virt, dma_addr_t addr); +extern void debug_dma_map_resource(struct device *dev, phys_addr_t addr, + size_t size, int direction, + dma_addr_t dma_addr); + +extern void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, + size_t size, int direction); + extern void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, int direction); @@ -141,6 +148,18 @@ static inline void debug_dma_free_coherent(struct device *dev, size_t size, { } +static inline void debug_dma_map_resource(struct device *dev, phys_addr_t addr, + size_t size, int direction, + dma_addr_t dma_addr) +{ +} + +static inline void debug_dma_unmap_resource(struct device *dev, + dma_addr_t dma_addr, size_t size, + int direction) +{ +} + static inline void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, int direction) -- cgit v1.2.3 From 6f3d87968f9c8b529bc81eff5a1f45e92553493d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 10 Aug 2016 13:22:16 +0200 Subject: dma-mapping: add dma_{map,unmap}_resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Map/Unmap a device MMIO resource from a physical address. If no dma_map_ops method is available the operation is a no-op. Signed-off-by: Niklas Söderlund Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- include/linux/dma-mapping.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 673b7be85f7a..6e00c7fdbbd3 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -264,6 +264,42 @@ static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, debug_dma_unmap_page(dev, addr, size, dir, false); } +static inline dma_addr_t dma_map_resource(struct device *dev, + phys_addr_t phys_addr, + size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + unsigned long pfn = __phys_to_pfn(phys_addr); + dma_addr_t addr; + + BUG_ON(!valid_dma_direction(dir)); + + /* Don't allow RAM to be mapped */ + BUG_ON(pfn_valid(pfn)); + + addr = phys_addr; + if (ops->map_resource) + addr = ops->map_resource(dev, phys_addr, size, dir, attrs); + + debug_dma_map_resource(dev, phys_addr, size, dir, addr); + + return addr; +} + +static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->unmap_resource) + ops->unmap_resource(dev, addr, size, dir, attrs); + debug_dma_unmap_resource(dev, addr, size, dir); +} + static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) -- cgit v1.2.3 From 93d05f1ec644c97535159649de7f8e6731841336 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 2 Sep 2016 15:57:09 +0530 Subject: dmaengine: mmp_tdma: enable COMPILE_TEST To get more coverage, enable COMPILE_TEST for this driver. While at it, to fix build on other archs, select MMP_SRAM only for ARCH_MMP and also fix the platform header Suggested-by: Arnd Bergmann Acked-by: Zhangfei Gao Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-mmp_tdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-mmp_tdma.h b/include/linux/platform_data/dma-mmp_tdma.h index 0c72886030ef..422d4504dbac 100644 --- a/include/linux/platform_data/dma-mmp_tdma.h +++ b/include/linux/platform_data/dma-mmp_tdma.h @@ -28,7 +28,7 @@ struct sram_platdata { int granularity; }; -#ifdef CONFIG_ARM +#ifdef CONFIG_MMP_SRAM extern struct gen_pool *sram_get_gpool(char *pool_name); #else static inline struct gen_pool *sram_get_gpool(char *pool_name) -- cgit v1.2.3 From 34681d84a0f7cc22ded1413dc79eef8a2f23d9c3 Mon Sep 17 00:00:00 2001 From: Sam Van Den Berge Date: Thu, 22 Sep 2016 20:51:15 +0200 Subject: dmaengine: s3c24xx: Add dma_slave_map for s3c2440 devices This patch updates the s3c24xx dma driver to be able to pass a dma_slave_map array via the platform data. This is needed to be able to use the new, simpler dmaengine API [1]. I used the virtual DMA channels as a parameter for the dma_filter function. By doing that, I could reuse the existing filter function in drivers/dma/s3c24xx-dma.c. I have tested this on my mini2440 board with the audio driver. According to my observations, dma_request_slave_channel in the function dmaengine_pcm_new in the file sound/soc/soc-generic-dmaengine-pcm.c now returns a valid DMA channel whereas before no DMA channel was returned at that point. Entries for DMACH_XD0, DMACH_XD1 and DMACH_TIMER are missing because I don't realy know which driver to use for these. [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2015-December/393635.html Signed-off-by: Sam Van Den Berge Reviewed-by: Sylwester Nawrocki Acked-by: Arnd Bergmann Acked-by: Krzysztof Kozlowski Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-s3c24xx.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h index 89ba1b0c90e4..4f9aba405e96 100644 --- a/include/linux/platform_data/dma-s3c24xx.h +++ b/include/linux/platform_data/dma-s3c24xx.h @@ -30,16 +30,22 @@ struct s3c24xx_dma_channel { u16 chansel; }; +struct dma_slave_map; + /** * struct s3c24xx_dma_platdata - platform specific settings * @num_phy_channels: number of physical channels * @channels: array of virtual channel descriptions * @num_channels: number of virtual channels + * @slave_map: dma slave map matching table + * @slavecnt: number of elements in slave_map */ struct s3c24xx_dma_platdata { int num_phy_channels; struct s3c24xx_dma_channel *channels; int num_channels; + const struct dma_slave_map *slave_map; + int slavecnt; }; struct dma_chan; -- cgit v1.2.3 From 5275a652d296711aaf7f2f4173c8db153e5777c3 Mon Sep 17 00:00:00 2001 From: Uri Yanai Date: Sun, 14 Aug 2016 11:46:36 +0300 Subject: =?UTF-8?q?mmc:=20sd:=20Export=20SD=20Status=20via=20=E2=80=9Cssr?= =?UTF-8?q?=E2=80=9D=20device=20attribute?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SD Status register contains several important fields related to the SD Card proprietary features. Those fields may be used by user space applications for vendor specific usage. None of those fields are exported today by the driver to user space. In this patch, we are reading the SD Status register and exporting (using MMC_DEV_ATTR) the SD Status register to the user space. Signed-off-by: Uri Yanai Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index d8673ca968ba..73fad83acbcb 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -292,6 +292,7 @@ struct mmc_card { u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ u32 raw_scr[2]; /* raw card SCR */ + u32 raw_ssr[16]; /* raw card SSR */ struct mmc_cid cid; /* card identification */ struct mmc_csd csd; /* card specific */ struct mmc_ext_csd ext_csd; /* mmc v4 extended card specific */ -- cgit v1.2.3 From d6786fefe816ba60c794f8a41a73b0dd3a4df097 Mon Sep 17 00:00:00 2001 From: Guodong Xu Date: Fri, 12 Aug 2016 16:51:26 +0800 Subject: mmc: dw_mmc: add reset support to dwmmc host controller Dwmmc host controller may in unknown state when entering kernel boot. One example is when booting from eMMC, bootloader need initialize MMC host controller into some state so it can read. In order to make sure MMC host controller in a clean initial state, this reset support is added. With this patch, a 'resets' property can be added into dw_mmc device tree node. The hardware logic is: dwmmc host controller IP receives a reset signal from a 'reset provider' (eg. power management unit). The 'resets' property points to this reset signal. So, during dwmmc driver probe, it can use this signal to reset itself. Refer to [1] for more information. [1] Documentation/devicetree/bindings/reset/reset.txt Signed-off-by: Guodong Xu Signed-off-by: Xinwei Kong Signed-off-by: Zhangfei Gao Reviewed-by: Shawn Lin Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- include/linux/mmc/dw_mmc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 83b0edfce471..f5af2bd35e7f 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -17,6 +17,7 @@ #include #include #include +#include #define MAX_MCI_SLOTS 2 @@ -259,6 +260,7 @@ struct dw_mci_board { /* delay in mS before detecting cards after interrupt */ u32 detect_delay_ms; + struct reset_control *rstc; struct dw_mci_dma_ops *dma_ops; struct dma_pdata *data; }; -- cgit v1.2.3 From 5163af5a5e2e69c9a5a854b92ffa7e2f7672dbf7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 16 Aug 2016 13:44:11 +0300 Subject: mmc: core: Add support for sending commands during data transfer A host controller driver exposes its capability using caps flag MMC_CAP_CMD_DURING_TFR. A driver with that capability can accept requests that are marked mrq->cap_cmd_during_tfr = true. Then the driver informs the upper layers when the command line is available for further commands by calling mmc_command_done(). Because of that, the driver will not then automatically send STOP commands, and it is the responsibility of the upper layer to send a STOP command if it is required. For requests submitted through the mmc_wait_for_req() interface, the caller sets mrq->cap_cmd_during_tfr = true which causes mmc_wait_for_req() in fact not to wait. The caller can then send commands that do not use the data lines. Finally the caller can wait for the transfer to complete by calling mmc_wait_for_req_done() which is now exported. For requests submitted through the mmc_start_req() interface, the caller again sets mrq->cap_cmd_during_tfr = true, but mmc_start_req() anyway does not wait. The caller can then send commands that do not use the data lines. Finally the caller can wait for the transfer to complete in the normal way i.e. calling mmc_start_req() again. Irrespective of how a cap_cmd_during_tfr request is started, mmc_is_req_done() can be called if the upper layer needs to determine if the request is done. However the appropriate waiting function (either mmc_wait_for_req_done() or mmc_start_req()) must still be called. The implementation consists primarily of a new completion mrq->cmd_completion which notifies when the command line is available for further commands. That completion is completed by mmc_command_done(). When there is an ongoing data transfer, calls to mmc_wait_for_req() will automatically wait on that completion, so the caller does not have to do anything special. Note, in the case of errors, the driver may call mmc_request_done() without calling mmc_command_done() because mmc_request_done() always calls mmc_command_done(). Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 7 +++++++ include/linux/mmc/host.h | 5 +++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index b01e77de1a74..368bed70aa9d 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -133,8 +133,12 @@ struct mmc_request { struct mmc_command *stop; struct completion completion; + struct completion cmd_completion; void (*done)(struct mmc_request *);/* completion function */ struct mmc_host *host; + + /* Allow other commands during this ongoing data transfer or busy wait */ + bool cap_cmd_during_tfr; }; struct mmc_card; @@ -146,6 +150,9 @@ extern struct mmc_async_req *mmc_start_req(struct mmc_host *, struct mmc_async_req *, int *); extern int mmc_interrupt_hpi(struct mmc_card *); extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *); +extern void mmc_wait_for_req_done(struct mmc_host *host, + struct mmc_request *mrq); +extern bool mmc_is_req_done(struct mmc_host *host, struct mmc_request *mrq); extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *); extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index aa4bfbf129e4..0b2439441cc8 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -281,6 +281,7 @@ struct mmc_host { #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ +#define MMC_CAP_CMD_DURING_TFR (1 << 29) /* Commands during data transfer */ #define MMC_CAP_CMD23 (1 << 30) /* CMD23 supported. */ #define MMC_CAP_HW_RESET (1 << 31) /* Hardware reset */ @@ -382,6 +383,9 @@ struct mmc_host { struct mmc_async_req *areq; /* active async req */ struct mmc_context_info context_info; /* async synchronization info */ + /* Ongoing data transfer that allows commands during transfer */ + struct mmc_request *ongoing_mrq; + #ifdef CONFIG_FAIL_MMC_REQUEST struct fault_attr fail_mmc_request; #endif @@ -418,6 +422,7 @@ int mmc_power_restore_host(struct mmc_host *host); void mmc_detect_change(struct mmc_host *, unsigned long delay); void mmc_request_done(struct mmc_host *, struct mmc_request *); +void mmc_command_done(struct mmc_host *host, struct mmc_request *mrq); static inline void mmc_signal_sdio_irq(struct mmc_host *host) { -- cgit v1.2.3 From 51b50c961676428cd356d6fa494a2e9f53dc77bf Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 19 Sep 2016 22:57:45 +0200 Subject: mmc: add define for R1 response without CRC The core uses it for polling. Give drivers a proper define handle this case like for other response types. Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 368bed70aa9d..2b953eb8ceae 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -55,6 +55,9 @@ struct mmc_command { #define MMC_RSP_R6 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) #define MMC_RSP_R7 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) +/* Can be used by core to poll after switch to MMC HS mode */ +#define MMC_RSP_R1_NO_CRC (MMC_RSP_PRESENT|MMC_RSP_OPCODE) + #define mmc_resp_type(cmd) ((cmd)->flags & (MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC|MMC_RSP_BUSY|MMC_RSP_OPCODE)) /* -- cgit v1.2.3 From 4a7069a32c99a81950de035535b0a064dcceaeba Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Thu, 5 May 2016 14:21:42 +0530 Subject: thermal: core: export apis to get slope and offset Add apis for platform thermal drivers to query for slope and offset attributes, which might be needed for temperature calculations. Signed-off-by: Rajendra Nayak Signed-off-by: Eduardo Valentin Signed-off-by: Zhang Rui --- include/linux/thermal.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index ee517bef0db0..707d7353c28b 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -435,6 +435,8 @@ thermal_of_cooling_device_register(struct device_node *np, char *, void *, void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); +int thermal_zone_get_slope(struct thermal_zone_device *tz); +int thermal_zone_get_offset(struct thermal_zone_device *tz); int get_tz_trend(struct thermal_zone_device *, int); struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, @@ -492,6 +494,12 @@ static inline struct thermal_zone_device *thermal_zone_get_zone_by_name( static inline int thermal_zone_get_temp( struct thermal_zone_device *tz, int *temp) { return -ENODEV; } +static inline int thermal_zone_get_slope( + struct thermal_zone_device *tz) +{ return -ENODEV; } +static inline int thermal_zone_get_offset( + struct thermal_zone_device *tz) +{ return -ENODEV; } static inline int get_tz_trend(struct thermal_zone_device *tz, int trip) { return -ENODEV; } static inline struct thermal_instance * -- cgit v1.2.3 From 060c034a974187e930b790957cafc5047cc30a40 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 22 Jun 2016 16:42:01 +0800 Subject: thermal: Add support for hardware-tracked trip points This adds support for hardware-tracked trip points to the device tree thermal sensor framework. The framework supports an arbitrary number of trip points. Whenever the current temperature is updated, the trip points immediately below and above the current temperature are found. A .set_trips callback is then called with the temperatures. If there is no trip point above or below the current temperature, the passed trip temperature will be -INT_MAX or INT_MAX respectively. In this callback, the driver should program the hardware such that it is notified when either of these trip points are triggered. When a trip point is triggered, the driver should call `thermal_zone_device_update' for the respective thermal zone. This will cause the trip points to be updated again. If .set_trips is not implemented, the framework behaves as before. This patch is based on an earlier version from Mikko Perttunen Signed-off-by: Sascha Hauer Signed-off-by: Caesar Wang Cc: Zhang Rui Cc: Eduardo Valentin Cc: linux-pm@vger.kernel.org Reviewed-by: Javi Merino Signed-off-by: Eduardo Valentin Signed-off-by: Zhang Rui --- include/linux/thermal.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 707d7353c28b..54cdfeaaedd4 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -98,6 +98,7 @@ struct thermal_zone_device_ops { int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); int (*get_temp) (struct thermal_zone_device *, int *); + int (*set_trips) (struct thermal_zone_device *, int, int); int (*get_mode) (struct thermal_zone_device *, enum thermal_device_mode *); int (*set_mode) (struct thermal_zone_device *, @@ -168,6 +169,10 @@ struct thermal_attr { * @last_temperature: previous temperature read * @emul_temperature: emulated temperature when using CONFIG_THERMAL_EMULATION * @passive: 1 if you've crossed a passive trip point, 0 otherwise. + * @prev_low_trip: the low current temperature if you've crossed a passive + trip point. + * @prev_high_trip: the above current temperature if you've crossed a + passive trip point. * @forced_passive: If > 0, temperature at which to switch on all ACPI * processor cooling devices. Currently only used by the * step-wise governor. @@ -199,6 +204,8 @@ struct thermal_zone_device { int last_temperature; int emul_temperature; int passive; + int prev_low_trip; + int prev_high_trip; unsigned int forced_passive; atomic_t need_update; struct thermal_zone_device_ops *ops; @@ -426,6 +433,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *); +void thermal_zone_set_trips(struct thermal_zone_device *); struct thermal_cooling_device *thermal_cooling_device_register(char *, void *, const struct thermal_cooling_device_ops *); @@ -477,6 +485,8 @@ static inline int thermal_zone_unbind_cooling_device( { return -ENODEV; } static inline void thermal_zone_device_update(struct thermal_zone_device *tz) { } +static inline void thermal_zone_set_trips(struct thermal_zone_device *tz) +{ } static inline struct thermal_cooling_device * thermal_cooling_device_register(char *type, void *devdata, const struct thermal_cooling_device_ops *ops) -- cgit v1.2.3 From 826386e73193e0b58c6d797fbbab409bc98b1d9c Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 22 Jun 2016 16:42:02 +0800 Subject: thermal: of: implement .set_trips for device tree thermal zones This patch implements .set_trips for device tree thermal zones. As the hardware-tracked trip points is supported by thermal core patch[0]. patch[0] "thermal: Add support for hardware-tracked trip points". Signed-off-by: Sascha Hauer Signed-off-by: Caesar Wang Cc: Zhang Rui Cc: Eduardo Valentin Reviewed-by: Javi Merino Signed-off-by: Eduardo Valentin Signed-off-by: Zhang Rui --- include/linux/thermal.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 54cdfeaaedd4..20118b9ebeb7 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -340,6 +340,9 @@ struct thermal_genl_event { * * Optional: * @get_trend: a pointer to a function that reads the sensor temperature trend. + * @set_trips: a pointer to a function that sets a temperature window. When + * this window is left the driver must inform the thermal core via + * thermal_zone_device_update. * @set_emul_temp: a pointer to a function that sets sensor emulated * temperature. * @set_trip_temp: a pointer to a function that sets the trip temperature on @@ -348,6 +351,7 @@ struct thermal_genl_event { struct thermal_zone_of_device_ops { int (*get_temp)(void *, int *); int (*get_trend)(void *, long *); + int (*set_trips)(void *, int, int); int (*set_emul_temp)(void *, int); int (*set_trip_temp)(void *, int, int); }; -- cgit v1.2.3 From e78eaf45993a51e5d7120de48aa01f059ffe8d37 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 22 Jun 2016 16:42:03 +0800 Subject: thermal: streamline get_trend callbacks The .get_trend callback in struct thermal_zone_device_ops has the prototype: int (*get_trend) (struct thermal_zone_device *, int, enum thermal_trend *); whereas the .get_trend callback in struct thermal_zone_of_device_ops has: int (*get_trend)(void *, long *); Streamline both prototypes and add the trip argument to the OF callback aswell and use enum thermal_trend * instead of an integer pointer. While the OF prototype may be the better one, this should be decided at framework level and not on OF level. Signed-off-by: Sascha Hauer Signed-off-by: Caesar Wang Cc: Zhang Rui Cc: Eduardo Valentin Cc: linux-pm@vger.kernel.org Reviewed-by: Keerthy Signed-off-by: Eduardo Valentin Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 20118b9ebeb7..b3c16f06fdc4 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -350,7 +350,7 @@ struct thermal_genl_event { */ struct thermal_zone_of_device_ops { int (*get_temp)(void *, int *); - int (*get_trend)(void *, long *); + int (*get_trend)(void *, int, enum thermal_trend *); int (*set_trips)(void *, int, int); int (*set_emul_temp)(void *, int); int (*set_trip_temp)(void *, int, int); -- cgit v1.2.3 From 0e70f466fb910ae54c4c71243b99385129e93feb Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 26 Aug 2016 16:21:16 -0700 Subject: thermal: Enhance thermal_zone_device_update for events Added one additional parameter to thermal_zone_device_update() to provide caller with an optional capability to specify reason. Currently this event is used by user space governor to trigger different processing based on event code. Also it saves an additional call to read temperature when the event is received. The following events are cuurently defined: - Unspecified event - New temperature sample - Trip point violated - Trip point changed - thermal device up and down - thermal device power capability changed Signed-off-by: Srinivas Pandruvada Signed-off-by: Zhang Rui --- include/linux/thermal.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index b3c16f06fdc4..511182a88e76 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -92,6 +92,17 @@ enum thermal_trend { THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */ }; +/* Thermal notification reason */ +enum thermal_notify_event { + THERMAL_EVENT_UNSPECIFIED, /* Unspecified event */ + THERMAL_EVENT_TEMP_SAMPLE, /* New Temperature sample */ + THERMAL_TRIP_VIOLATED, /* TRIP Point violation */ + THERMAL_TRIP_CHANGED, /* TRIP Point temperature changed */ + THERMAL_DEVICE_DOWN, /* Thermal device is down */ + THERMAL_DEVICE_UP, /* Thermal device is up after a down event */ + THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */ +}; + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); @@ -187,6 +198,7 @@ struct thermal_attr { * @lock: lock to protect thermal_instances list * @node: node in thermal_tz_list (in thermal_core.c) * @poll_queue: delayed work for polling + * @notify_event: Last notification event */ struct thermal_zone_device { int id; @@ -217,6 +229,7 @@ struct thermal_zone_device { struct mutex lock; struct list_head node; struct delayed_work poll_queue; + enum thermal_notify_event notify_event; }; /** @@ -436,7 +449,8 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, unsigned int); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); -void thermal_zone_device_update(struct thermal_zone_device *); +void thermal_zone_device_update(struct thermal_zone_device *, + enum thermal_notify_event); void thermal_zone_set_trips(struct thermal_zone_device *); struct thermal_cooling_device *thermal_cooling_device_register(char *, void *, @@ -487,7 +501,8 @@ static inline int thermal_zone_unbind_cooling_device( struct thermal_zone_device *tz, int trip, struct thermal_cooling_device *cdev) { return -ENODEV; } -static inline void thermal_zone_device_update(struct thermal_zone_device *tz) +static inline void thermal_zone_device_update(struct thermal_zone_device *tz, + enum thermal_notify_event event) { } static inline void thermal_zone_set_trips(struct thermal_zone_device *tz) { } -- cgit v1.2.3 From e0e0be8a835520e2f7c89f214dfda570922a1b90 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2016 11:03:57 +0200 Subject: libfs: support RENAME_NOREPLACE in simple_rename() This is trivial to do: - add flags argument to simple_rename() - check if flags doesn't have any other than RENAME_NOREPLACE - assign simple_rename() to .rename2 instead of .rename Filesystems converted: hugetlbfs, ramfs, bpf. Debugfs uses simple_rename() to implement debugfs_rename(), which is for debugfs instances to rename files internally, not for userspace filesystem access. For this case pass zero flags to simple_rename(). Signed-off-by: Miklos Szeredi Acked-by: Greg Kroah-Hartman Cc: Alexei Starovoitov --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..2bd67545fdf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2950,7 +2950,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); -extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +extern int simple_rename(struct inode *, struct dentry *, + struct inode *, struct dentry *, unsigned int); extern int noop_fsync(struct file *, loff_t, loff_t, int); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); -- cgit v1.2.3 From 18fc84dafaac1fd63d5e6e600058eada8fc7914b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2016 11:03:58 +0200 Subject: vfs: remove unused i_op->rename No in-tree uses remain. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2bd67545fdf8..6b14ceba4f20 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1732,8 +1732,6 @@ struct inode_operations { int (*mkdir) (struct inode *,struct dentry *,umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); int (*rename2) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); -- cgit v1.2.3 From 2773bf00aeb9bf39e022463272a61dd0ec9f55f4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2016 11:03:58 +0200 Subject: fs: rename "rename2" i_op to "rename" Generated patch: sed -i "s/\.rename2\t/\.rename\t\t/" `git grep -wl rename2` sed -i "s/\brename2\b/rename/g" `git grep -wl rename2` Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6b14ceba4f20..cf7e621f7413 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1732,7 +1732,7 @@ struct inode_operations { int (*mkdir) (struct inode *,struct dentry *,umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); - int (*rename2) (struct inode *, struct dentry *, + int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); -- cgit v1.2.3 From 2d8784df1a1c11ace4e244780facec1e945c5b4f Mon Sep 17 00:00:00 2001 From: Michael Moese Date: Wed, 14 Sep 2016 12:05:24 +0200 Subject: mcb: Add a dma_device to mcb_device When performing DMA operations on a MCB device, the device needed for using the DMA API is "mcb_device->bus_carrier". This is rather lengthy, so a shortcut is introduced to struct mcb_device in order to ensure the MCB device driver uses the correct device for DMA operations. Signed-off-by: Michael Moese Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- include/linux/mcb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mcb.h b/include/linux/mcb.h index ee5200d660b0..4097ac9ea13a 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -76,6 +76,7 @@ struct mcb_device { int rev; struct resource irq; struct resource mem; + struct device *dma_dev; }; static inline struct mcb_device *to_mcb_device(struct device *dev) -- cgit v1.2.3 From e7fca5d860aeeb1e606448f5191cea8d925cc7a3 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 16 Sep 2016 09:01:18 -0700 Subject: Drivers: hv: get rid of id in struct vmbus_channel The auto incremented counter is not being used anymore, get rid of it. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 7d7cbff33bda..cd184bdca58f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -706,9 +706,6 @@ struct vmbus_device { }; struct vmbus_channel { - /* Unique channel id */ - int id; - struct list_head listentry; struct hv_device *device_obj; -- cgit v1.2.3 From 36131cdfef5aef7f4a9a36423a7a338bd6f68ad6 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 21 Sep 2016 12:44:14 +0200 Subject: tty/serial: atmel: fix fractional baud rate computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The problem with previous code was it rounded values in wrong place and produced wrong baud rate in some cases. Signed-off-by: Alexey Starikovskiy [nicolas.ferre@atmel.com: port to newer kernel and add commit log] Signed-off-by: Nicolas Ferre Reviewed-by: Boris Brezillon Reviewed-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- include/linux/atmel_serial.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atmel_serial.h b/include/linux/atmel_serial.h index f8e452aa48d7..bd2560502f3c 100644 --- a/include/linux/atmel_serial.h +++ b/include/linux/atmel_serial.h @@ -119,6 +119,7 @@ #define ATMEL_US_BRGR 0x20 /* Baud Rate Generator Register */ #define ATMEL_US_CD GENMASK(15, 0) /* Clock Divider */ #define ATMEL_US_FP_OFFSET 16 /* Fractional Part */ +#define ATMEL_US_FP_MASK 0x7 #define ATMEL_US_RTOR 0x24 /* Receiver Time-out Register for USART */ #define ATMEL_UA_RTOR 0x28 /* Receiver Time-out Register for UART */ -- cgit v1.2.3 From f7a62adad01cdb2b64c5a17cdd440736b99a5829 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:02 -0400 Subject: NFSv4.1: Allow revoked stateids to skip the call to TEST_STATEID In some cases (e.g. when the SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED sequence flag is set) we may already know that the stateid was revoked and that the only valid operation we can call is FREE_STATEID. In those cases, allow the stateid to carry the information in the type field, so that we skip the redundant call to TEST_STATEID. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c6564ada9beb..9094faf0699d 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -67,6 +67,7 @@ struct nfs4_stateid_struct { NFS4_DELEGATION_STATEID_TYPE, NFS4_LAYOUT_STATEID_TYPE, NFS4_PNFS_DS_STATEID_TYPE, + NFS4_REVOKED_STATEID_TYPE, } type; }; -- cgit v1.2.3 From 66b808099146166c44157600a166c8372172cd76 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 27 Sep 2016 16:23:34 -0400 Subject: PCI/AER: Cache capability position Save the position of the error reporting capability so it doesn't need to be rediscovered during error handling. Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas CC: Lukas Wunner --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 57bc838e0666..ab6b02763916 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -269,6 +269,9 @@ struct pci_dev { unsigned int class; /* 3 bytes: (base,sub,prog-if) */ u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ +#ifdef CONFIG_PCIEAER + u16 aer_cap; /* AER capability offset */ +#endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ u8 msix_cap; /* MSI-X capability offset */ @@ -1369,9 +1372,11 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } #ifdef CONFIG_PCIEAER void pci_no_aer(void); bool pci_aer_available(void); +int pci_aer_init(struct pci_dev *dev); #else static inline void pci_no_aer(void) { } static inline bool pci_aer_available(void) { return false; } +static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; } #endif #ifdef CONFIG_PCIE_ECRC -- cgit v1.2.3 From 4bce9f6ee8f84fdf333d0fd7fcf7f0d8c7cce7fa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 17 Sep 2016 18:02:44 -0400 Subject: get rid of separate multipage fault-in primitives * the only remaining callers of "short" fault-ins are just as happy with generic variants (both in lib/iov_iter.c); switch them to multipage variants, kill the "short" ones * rename the multipage variants to now available plain ones. * get rid of compat macro defining iov_iter_fault_in_multipage_readable by expanding it in its only user. Signed-off-by: Al Viro --- include/linux/pagemap.h | 54 ++----------------------------------------------- include/linux/uio.h | 1 - 2 files changed, 2 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 01e84436cddf..cb2e1d06d2e9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -518,58 +518,9 @@ void page_endio(struct page *page, bool is_write, int err); extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter); /* - * Fault one or two userspace pages into pagetables. - * Return -EINVAL if more than two pages would be needed. - * Return non-zero on a fault. + * Fault everything in given userspace address range in. */ static inline int fault_in_pages_writeable(char __user *uaddr, int size) -{ - int span, ret; - - if (unlikely(size == 0)) - return 0; - - span = offset_in_page(uaddr) + size; - if (span > 2 * PAGE_SIZE) - return -EINVAL; - /* - * Writing zeroes into userspace here is OK, because we know that if - * the zero gets there, we'll be overwriting it. - */ - ret = __put_user(0, uaddr); - if (ret == 0 && span > PAGE_SIZE) - ret = __put_user(0, uaddr + size - 1); - return ret; -} - -static inline int fault_in_pages_readable(const char __user *uaddr, int size) -{ - volatile char c; - int ret; - - if (unlikely(size == 0)) - return 0; - - ret = __get_user(c, uaddr); - if (ret == 0) { - const char __user *end = uaddr + size - 1; - - if (((unsigned long)uaddr & PAGE_MASK) != - ((unsigned long)end & PAGE_MASK)) { - ret = __get_user(c, end); - (void)c; - } - } - return ret; -} - -/* - * Multipage variants of the above prefault helpers, useful if more than - * PAGE_SIZE of data needs to be prefaulted. These are separate from the above - * functions (which only handle up to PAGE_SIZE) to avoid clobbering the - * filemap.c hotpaths. - */ -static inline int fault_in_multipages_writeable(char __user *uaddr, int size) { char __user *end = uaddr + size - 1; @@ -596,8 +547,7 @@ static inline int fault_in_multipages_writeable(char __user *uaddr, int size) return 0; } -static inline int fault_in_multipages_readable(const char __user *uaddr, - int size) +static inline int fault_in_pages_readable(const char __user *uaddr, int size) { volatile char c; const char __user *end = uaddr + size - 1; diff --git a/include/linux/uio.h b/include/linux/uio.h index 75b4aaf31a9d..7709f8d4a9cb 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -76,7 +76,6 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -#define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); -- cgit v1.2.3 From 9b80a184eaadc117f27faad522008f31d571621b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 2 Sep 2016 00:38:52 +0300 Subject: fs/file: more unsigned file descriptors Propagate unsignedness for grand total of 149 bytes: $ ./scripts/bloat-o-meter ../vmlinux-000 ../obj/vmlinux add/remove: 0/0 grow/shrink: 0/10 up/down: 0/-149 (-149) function old new delta set_close_on_exec 99 98 -1 put_files_struct 201 200 -1 get_close_on_exec 59 58 -1 do_prlimit 498 497 -1 do_execveat_common.isra 1662 1661 -1 __close_fd 178 173 -5 do_dup2 219 204 -15 seq_show 685 660 -25 __alloc_fd 384 357 -27 dup_fd 718 646 -72 It mostly comes from converting "unsigned int" to "long" for bit operations. Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro --- include/linux/fdtable.h | 6 +++--- include/linux/fs.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 5295535b60c6..aca2a6a1d035 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -30,12 +30,12 @@ struct fdtable { struct rcu_head rcu; }; -static inline bool close_on_exec(int fd, const struct fdtable *fdt) +static inline bool close_on_exec(unsigned int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->close_on_exec); } -static inline bool fd_is_open(int fd, const struct fdtable *fdt) +static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt) { return test_bit(fd, fdt->open_fds); } @@ -57,7 +57,7 @@ struct files_struct { * written part on a separate cache line in SMP */ spinlock_t file_lock ____cacheline_aligned_in_smp; - int next_fd; + unsigned int next_fd; unsigned long close_on_exec_init[1]; unsigned long open_fds_init[1]; unsigned long full_fds_bits_init[1]; diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..2f6f059d739c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -63,7 +63,7 @@ extern void __init files_maxfiles_init(void); extern struct files_stat_struct files_stat; extern unsigned long get_max_files(void); -extern int sysctl_nr_open; +extern unsigned int sysctl_nr_open; extern struct inodes_stat_t inodes_stat; extern int leases_enable, lease_break_time; extern int sysctl_protected_symlinks; -- cgit v1.2.3 From 3cd886666ff19e9796a519e16d94fc94f79c8a4c Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 14 Sep 2016 07:48:02 -0700 Subject: vfs: Add current_time() api current_fs_time() is used for inode timestamps. Change the signature of the function to take inode pointer instead of superblock as per Linus's suggestion. Also, move the api under vfs as per the discussion on the thread: https://lkml.org/lkml/2016/6/9/36 . As per Arnd's suggestion on the thread, changing the function name. current_fs_time() will be deleted after all the references to it are replaced by current_time(). There was a bug reported by kbuild test bot with the change as some of the calls to current_time() were made before the super_block was initialized. Catch these accidental assignments as timespec_trunc() does for wrong granularities. This allows for the function to work right even in these circumstances. But, adds a warning to make the user aware of the bug. A coccinelle script was used to identify all the current .alloc_inode super_block callbacks that updated inode timestamps. proc filesystem was the only one that was modifying inode times as part of this callback. The series includes a patch to fix that. Note that timespec_trunc() will also be moved to fs/inode.c in a separate patch when this will need to be revamped for bounds checking purposes. Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..32ce6b31a61b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1459,6 +1459,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) } extern struct timespec current_fs_time(struct super_block *sb); +extern struct timespec current_time(struct inode *inode); /* * Snapshotting support. -- cgit v1.2.3 From 9dcfcda5768eda793e15a1a73da38cfd1fc1a47a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Sep 2016 09:45:24 +1000 Subject: compat: remove compat_printk() After 7e8e385aaf6e ("x86/compat: Remove sys32_vm86_warning"), this function has become unused, so we can remove it as well. Link: http://lkml.kernel.org/r/20160617142903.3070388-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Cc: Alexander Viro Cc: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Signed-off-by: Andrew Morton --- include/linux/compat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index f964ef79e0ad..63609398ef9f 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -432,7 +432,6 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); -extern __printf(1, 2) int compat_printk(const char *fmt, ...); extern void sigset_from_compat(sigset_t *set, const compat_sigset_t *compat); extern void sigset_to_compat(compat_sigset_t *compat, const sigset_t *set); -- cgit v1.2.3 From 2211d5ba5c6c4e972ba6dbc912b2897425ea6621 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 27 Sep 2016 13:03:22 +0200 Subject: posix_acl: xattr representation cleanups Remove the unnecessary typedefs and the zero-length a_entries array in struct posix_acl_xattr_header. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- include/linux/posix_acl_xattr.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index e5e8ec40278d..d23d36842322 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -18,34 +18,33 @@ /* An undefined entry e_id value */ #define ACL_UNDEFINED_ID (-1) -typedef struct { +struct posix_acl_xattr_entry { __le16 e_tag; __le16 e_perm; __le32 e_id; -} posix_acl_xattr_entry; +}; -typedef struct { +struct posix_acl_xattr_header { __le32 a_version; - posix_acl_xattr_entry a_entries[0]; -} posix_acl_xattr_header; +}; static inline size_t posix_acl_xattr_size(int count) { - return (sizeof(posix_acl_xattr_header) + - (count * sizeof(posix_acl_xattr_entry))); + return (sizeof(struct posix_acl_xattr_header) + + (count * sizeof(struct posix_acl_xattr_entry))); } static inline int posix_acl_xattr_count(size_t size) { - if (size < sizeof(posix_acl_xattr_header)) + if (size < sizeof(struct posix_acl_xattr_header)) return -1; - size -= sizeof(posix_acl_xattr_header); - if (size % sizeof(posix_acl_xattr_entry)) + size -= sizeof(struct posix_acl_xattr_header); + if (size % sizeof(struct posix_acl_xattr_entry)) return -1; - return size / sizeof(posix_acl_xattr_entry); + return size / sizeof(struct posix_acl_xattr_entry); } #ifdef CONFIG_FS_POSIX_ACL -- cgit v1.2.3 From bc8bcf3b150a29cd8d3f17a1aeb19a804ea683fa Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 27 Sep 2016 13:03:23 +0200 Subject: posix_acl: uapi header split Export the base definitions and the xattr representation of POSIX ACLs to user space. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- include/linux/posix_acl.h | 22 +--------------------- include/linux/posix_acl_xattr.h | 18 +----------------- 2 files changed, 2 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index d5d3d741f028..5433eea8e97c 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -11,27 +11,7 @@ #include #include #include - -#define ACL_UNDEFINED_ID (-1) - -/* a_type field in acl_user_posix_entry_t */ -#define ACL_TYPE_ACCESS (0x8000) -#define ACL_TYPE_DEFAULT (0x4000) - -/* e_tag entry in struct posix_acl_entry */ -#define ACL_USER_OBJ (0x01) -#define ACL_USER (0x02) -#define ACL_GROUP_OBJ (0x04) -#define ACL_GROUP (0x08) -#define ACL_MASK (0x10) -#define ACL_OTHER (0x20) - -/* permissions in the e_perm field */ -#define ACL_READ (0x04) -#define ACL_WRITE (0x02) -#define ACL_EXECUTE (0x01) -//#define ACL_ADD (0x08) -//#define ACL_DELETE (0x10) +#include struct posix_acl_entry { short e_tag; diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index d23d36842322..8b867e3bf3aa 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -10,25 +10,9 @@ #define _POSIX_ACL_XATTR_H #include +#include #include -/* Supported ACL a_version fields */ -#define POSIX_ACL_XATTR_VERSION 0x0002 - -/* An undefined entry e_id value */ -#define ACL_UNDEFINED_ID (-1) - -struct posix_acl_xattr_entry { - __le16 e_tag; - __le16 e_perm; - __le32 e_id; -}; - -struct posix_acl_xattr_header { - __le32 a_version; -}; - - static inline size_t posix_acl_xattr_size(int count) { -- cgit v1.2.3 From a8db115e476ee31fc3e892522038da50dd3a66cc Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 21 Sep 2016 15:41:30 +0300 Subject: dmaengine/ARM: omap-dma: Fix the DMAengine compile test on non OMAP configs The DMAengine driver for omap-dma use three function calls from the plat-omap legacy driver. When the DMAengine driver is built when ARCH_OMAP is not set, the compilation will fail due to missing symbols. Add empty inline functions to allow the DMAengine driver to be compiled with COMPILE_TEST. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- include/linux/omap-dma.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h index 1d99b61adc65..290081620b3e 100644 --- a/include/linux/omap-dma.h +++ b/include/linux/omap-dma.h @@ -297,6 +297,7 @@ struct omap_system_dma_plat_info { #define dma_omap15xx() __dma_omap15xx(d) #define dma_omap16xx() __dma_omap16xx(d) +#if defined(CONFIG_ARCH_OMAP) extern struct omap_system_dma_plat_info *omap_get_plat_info(void); extern void omap_set_dma_priority(int lch, int dst_port, int priority); @@ -355,4 +356,22 @@ static inline int omap_lcd_dma_running(void) } #endif +#else /* CONFIG_ARCH_OMAP */ + +static inline struct omap_system_dma_plat_info *omap_get_plat_info(void) +{ + return NULL; +} + +static inline int omap_request_dma(int dev_id, const char *dev_name, + void (*callback)(int lch, u16 ch_status, void *data), + void *data, int *dma_ch) +{ + return -ENODEV; +} + +static inline void omap_free_dma(int ch) { } + +#endif /* CONFIG_ARCH_OMAP */ + #endif /* __LINUX_OMAP_DMA_H */ -- cgit v1.2.3 From d503187b6cc4e41c21c02e695e0e7b5acdd066de Mon Sep 17 00:00:00 2001 From: Leif Lindholm Date: Tue, 27 Sep 2016 23:54:12 +0300 Subject: of/serial: move earlycon early_param handling to serial We have multiple "earlycon" early_param handlers - merge the DT one into the main earlycon one. It's a cleanup that also will be useful to defer setting up DT console until ACPI/DT decision is made. Rename the exported function to avoid clashing with the function from arch/microblaze/kernel/prom.c Signed-off-by: Leif Lindholm Signed-off-by: Aleksey Makarov Acked-by: Rob Herring Acked-by: Greg Kroah-Hartman Reviewed-by: Peter Hurley Tested-by: Kefeng Wang Tested-by: Christopher Covington Signed-off-by: Greg Kroah-Hartman --- include/linux/of_fdt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 26c3302ae58f..4341f32516d8 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -14,6 +14,7 @@ #include #include +#include /* Definitions used by the flattened device tree */ #define OF_DT_HEADER 0xd00dfeed /* marker */ @@ -66,6 +67,7 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); +extern int early_init_dt_scan_chosen_stdout(void); extern void early_init_fdt_scan_reserved_mem(void); extern void early_init_fdt_reserve_self(void); extern void early_init_dt_add_memory_arch(u64 base, u64 size); @@ -94,6 +96,7 @@ extern void early_get_first_memblock_info(void *, phys_addr_t *); extern u64 of_flat_dt_translate_address(unsigned long node); extern void of_fdt_limit_memory(int limit); #else /* CONFIG_OF_FLATTREE */ +static inline int early_init_dt_scan_chosen_stdout(void) { return -ENODEV; } static inline void early_init_fdt_scan_reserved_mem(void) {} static inline void early_init_fdt_reserve_self(void) {} static inline const char *of_flat_dt_get_machine_name(void) { return NULL; } -- cgit v1.2.3 From ad1696f6f09daacfdf2bf04bc83cd8f48d80e34a Mon Sep 17 00:00:00 2001 From: Aleksey Makarov Date: Tue, 27 Sep 2016 23:54:13 +0300 Subject: ACPI: parse SPCR and enable matching console 'ARM Server Base Boot Requiremets' [1] mentions SPCR (Serial Port Console Redirection Table) [2] as a mandatory ACPI table that specifies the configuration of serial console. Defer initialization of DT earlycon until ACPI/DT decision is made. Parse the ACPI SPCR table, setup earlycon if required, enable specified console. Thanks to Peter Hurley for explaining how this should work. [1] http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0044a/index.html [2] https://msdn.microsoft.com/en-us/library/windows/hardware/dn639132(v=vs.85).aspx Signed-off-by: Aleksey Makarov Acked-by: Rafael J. Wysocki Reviewed-by: Peter Hurley Tested-by: Kefeng Wang Tested-by: Christopher Covington Signed-off-by: Greg Kroah-Hartman --- include/linux/acpi.h | 6 ++++++ include/linux/serial_core.h | 9 ++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c5eaf2f80a4c..2353827731d2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1074,4 +1074,10 @@ void acpi_table_upgrade(void); static inline void acpi_table_upgrade(void) { } #endif +#ifdef CONFIG_ACPI_SPCR_TABLE +int parse_spcr(bool earlycon); +#else +static inline int parse_spcr(bool earlycon) { return 0; } +#endif + #endif /*_LINUX_ACPI_H*/ diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 378d80a8dd43..344201437017 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -367,11 +367,18 @@ extern const struct earlycon_id __earlycon_table_end[]; #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) -extern int setup_earlycon(char *buf); extern int of_setup_earlycon(const struct earlycon_id *match, unsigned long node, const char *options); +#ifdef CONFIG_SERIAL_EARLYCON +extern bool earlycon_init_is_deferred __initdata; +int setup_earlycon(char *buf); +#else +static const bool earlycon_init_is_deferred; +static inline int setup_earlycon(char *buf) { return 0; } +#endif + struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, -- cgit v1.2.3 From 3c6e8d05d60d8106b5cdc730cf220b2a4b521b66 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 16 Sep 2016 13:48:12 +0200 Subject: mfd/gpio: Move HTC GPIO driver to GPIO subsystem The HTC GPIO driver is a pure GPIO driver and I just can not see what it is doing inside MFD. Let's just move it to GPIO and take this opportunity to move the platform data to Cc: arm@kernel.org Cc: Russell King Acked-by: Lee Jones Acked-by: Arnd Bergmann Signed-off-by: Linus Walleij --- include/linux/mfd/htc-egpio.h | 57 ---------------------------- include/linux/platform_data/gpio-htc-egpio.h | 57 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 57 deletions(-) delete mode 100644 include/linux/mfd/htc-egpio.h create mode 100644 include/linux/platform_data/gpio-htc-egpio.h (limited to 'include/linux') diff --git a/include/linux/mfd/htc-egpio.h b/include/linux/mfd/htc-egpio.h deleted file mode 100644 index b4201c971367..000000000000 --- a/include/linux/mfd/htc-egpio.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * HTC simple EGPIO irq and gpio extender - */ - -#ifndef __HTC_EGPIO_H__ -#define __HTC_EGPIO_H__ - -#include - -/* Descriptive values for all-in or all-out htc_egpio_chip descriptors. */ -#define HTC_EGPIO_OUTPUT (~0) -#define HTC_EGPIO_INPUT 0 - -/** - * struct htc_egpio_chip - descriptor to create gpio_chip for register range - * @reg_start: index of first register - * @gpio_base: gpio number of first pin in this register range - * @num_gpios: number of gpios in this register range, max BITS_PER_LONG - * (number of registers = DIV_ROUND_UP(num_gpios, reg_width)) - * @direction: bitfield, '0' = input, '1' = output, - */ -struct htc_egpio_chip { - int reg_start; - int gpio_base; - int num_gpios; - unsigned long direction; - unsigned long initial_values; -}; - -/** - * struct htc_egpio_platform_data - description provided by the arch - * @irq_base: beginning of available IRQs (eg, IRQ_BOARD_START) - * @num_irqs: number of irqs - * @reg_width: number of bits per register, either 8 or 16 bit - * @bus_width: alignment of the registers, either 16 or 32 bit - * @invert_acks: set if chip requires writing '0' to ack an irq, instead of '1' - * @ack_register: location of the irq/ack register - * @chip: pointer to array of htc_egpio_chip descriptors - * @num_chips: number of egpio chip descriptors - */ -struct htc_egpio_platform_data { - int bus_width; - int reg_width; - - int irq_base; - int num_irqs; - int invert_acks; - int ack_register; - - struct htc_egpio_chip *chip; - int num_chips; -}; - -/* Determine the wakeup irq, to be called during early resume */ -extern int htc_egpio_get_wakeup_irq(struct device *dev); - -#endif diff --git a/include/linux/platform_data/gpio-htc-egpio.h b/include/linux/platform_data/gpio-htc-egpio.h new file mode 100644 index 000000000000..b4201c971367 --- /dev/null +++ b/include/linux/platform_data/gpio-htc-egpio.h @@ -0,0 +1,57 @@ +/* + * HTC simple EGPIO irq and gpio extender + */ + +#ifndef __HTC_EGPIO_H__ +#define __HTC_EGPIO_H__ + +#include + +/* Descriptive values for all-in or all-out htc_egpio_chip descriptors. */ +#define HTC_EGPIO_OUTPUT (~0) +#define HTC_EGPIO_INPUT 0 + +/** + * struct htc_egpio_chip - descriptor to create gpio_chip for register range + * @reg_start: index of first register + * @gpio_base: gpio number of first pin in this register range + * @num_gpios: number of gpios in this register range, max BITS_PER_LONG + * (number of registers = DIV_ROUND_UP(num_gpios, reg_width)) + * @direction: bitfield, '0' = input, '1' = output, + */ +struct htc_egpio_chip { + int reg_start; + int gpio_base; + int num_gpios; + unsigned long direction; + unsigned long initial_values; +}; + +/** + * struct htc_egpio_platform_data - description provided by the arch + * @irq_base: beginning of available IRQs (eg, IRQ_BOARD_START) + * @num_irqs: number of irqs + * @reg_width: number of bits per register, either 8 or 16 bit + * @bus_width: alignment of the registers, either 16 or 32 bit + * @invert_acks: set if chip requires writing '0' to ack an irq, instead of '1' + * @ack_register: location of the irq/ack register + * @chip: pointer to array of htc_egpio_chip descriptors + * @num_chips: number of egpio chip descriptors + */ +struct htc_egpio_platform_data { + int bus_width; + int reg_width; + + int irq_base; + int num_irqs; + int invert_acks; + int ack_register; + + struct htc_egpio_chip *chip; + int num_chips; +}; + +/* Determine the wakeup irq, to be called during early resume */ +extern int htc_egpio_get_wakeup_irq(struct device *dev); + +#endif -- cgit v1.2.3 From 2481366afd71a0c0b7cd725e6750c04cf589673b Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 28 Sep 2016 15:22:33 -0700 Subject: dma-mapping.h: preserve unmap info for CONFIG_DMA_API_DEBUG When CONFIG_DMA_API_DEBUG is enabled we need to preserve unmapping address even if "unmap" is a no-op for our architecutre because we need debug_dma_unmap_page() to correctly cleanup all of the debug bookkeeping. Failing to do so results in a false positive warnings about previously mapped areas never being unmapped. Link: http://lkml.kernel.org/r/1474387125-3713-1-git-send-email-andrew.smirnov@gmail.com Signed-off-by: Andrey Smirnov Reviewed-by: Robin Murphy Cc: Joerg Roedel Cc: Will Deacon Cc: Zhen Lei Cc: "Luis R. Rodriguez" Cc: Christian Borntraeger Cc: Geliang Tang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 66533e18276c..dc69df04abc1 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -718,7 +718,7 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_mmap_writecombine dma_mmap_wc #endif -#ifdef CONFIG_NEED_DMA_MAP_STATE +#if defined(CONFIG_NEED_DMA_MAP_STATE) || defined(CONFIG_DMA_API_DEBUG) #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME #define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) -- cgit v1.2.3 From 484611357c19f9e19ef742ebef4505a07d243cc9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 28 Sep 2016 10:54:32 -0400 Subject: bpf: allow access into map value arrays Suppose you have a map array value that is something like this struct foo { unsigned iter; int array[SOME_CONSTANT]; }; You can easily insert this into an array, but you cannot modify the contents of foo->array[] after the fact. This is because we have no way to verify we won't go off the end of the array at verification time. This patch provides a start for this work. We accomplish this by keeping track of a minimum and maximum value a register could be while we're checking the code. Then at the time we try to do an access into a MAP_VALUE we verify that the maximum offset into that region is a valid access into that memory region. So in practice, code such as this unsigned index = 0; if (foo->iter >= SOME_CONSTANT) foo->iter = index; else index = foo->iter++; foo->array[index] = bar; would be allowed, as we can verify that index will always be between 0 and SOME_CONSTANT-1. If you wish to use signed values you'll have to have an extra check to make sure the index isn't less than 0, or do something like index %= SOME_CONSTANT. Signed-off-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 +++++++ include/linux/bpf_verifier.h | 12 ++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5691fdc83819..c201017b5730 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -139,6 +139,13 @@ enum bpf_reg_type { */ PTR_TO_PACKET, PTR_TO_PACKET_END, /* skb->data + headlen */ + + /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map + * elem value. We only allow this if we can statically verify that + * access from this register are going to fall within the size of the + * map element. + */ + PTR_TO_MAP_VALUE_ADJ, }; struct bpf_prog; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c5cb661712c9..7035b997aaa5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -10,8 +10,19 @@ #include /* for enum bpf_reg_type */ #include /* for MAX_BPF_STACK */ + /* Just some arbitrary values so we can safely do math without overflowing and + * are obviously wrong for any sort of memory access. + */ +#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024) + struct bpf_reg_state { enum bpf_reg_type type; + /* + * Used to determine if any memory access using this register will + * result in a bad access. + */ + u64 min_value, max_value; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; @@ -81,6 +92,7 @@ struct bpf_verifier_env { u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool seen_direct_write; + bool varlen_map_value_access; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; -- cgit v1.2.3 From 6c3f70ac7c6b4a29b6905be879282628e65f50dd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 27 Sep 2016 11:58:44 -0700 Subject: HID: add missing \n to end of dev_warn messages Trival fix, dev_warn messages are missing a \n, so add it. Signed-off-by: Colin Ian King Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 75b66eccc692..b2ec82712baa 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -837,7 +837,7 @@ __u32 hid_field_extract(const struct hid_device *hid, __u8 *report, */ static inline void hid_device_io_start(struct hid_device *hid) { if (hid->io_started) { - dev_warn(&hid->dev, "io already started"); + dev_warn(&hid->dev, "io already started\n"); return; } hid->io_started = true; @@ -857,7 +857,7 @@ static inline void hid_device_io_start(struct hid_device *hid) { */ static inline void hid_device_io_stop(struct hid_device *hid) { if (!hid->io_started) { - dev_warn(&hid->dev, "io already stopped"); + dev_warn(&hid->dev, "io already stopped\n"); return; } hid->io_started = false; -- cgit v1.2.3 From 2895e1f8048d1be7b1b5be6439c740621c0e5361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Thu, 29 Sep 2016 12:02:39 +0200 Subject: dma-mapping: fix ia64 build, use PHYS_PFN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kbuild test robot reports: In file included from include/linux/skbuff.h:34:0, from include/linux/tcp.h:21, from drivers/net/ethernet/amd/xgbe/xgbe-drv.c:119: include/linux/dma-mapping.h: In function 'dma_map_resource': >> include/linux/dma-mapping.h:274:22: error: implicit declaration of function '__phys_to_pfn' [-Werror=implicit-function-declaration] unsigned long pfn = __phys_to_pfn(phys_addr); ^~~~~~~~~~~~~ ia64 does not provide __phys_to_pfn(), use the PHYS_PFN() alias. Signed-off-by: Niklas Söderlund Signed-off-by: Vinod Koul --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 6e00c7fdbbd3..ff7c87fb0305 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -271,7 +271,7 @@ static inline dma_addr_t dma_map_resource(struct device *dev, unsigned long attrs) { struct dma_map_ops *ops = get_dma_ops(dev); - unsigned long pfn = __phys_to_pfn(phys_addr); + unsigned long pfn = PHYS_PFN(phys_addr); dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); -- cgit v1.2.3 From 3757dc48a66f829cf6ba82a612ba4587ab4b5f1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Thu, 29 Sep 2016 12:02:40 +0200 Subject: dma-mapping: fix m32r build warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kbuild test robot reports: In file included from include/linux/skbuff.h:34:0, from include/linux/icmpv6.h:4, from include/linux/ipv6.h:75, from include/net/ipv6.h:16, from include/linux/sunrpc/clnt.h:27, from include/linux/nfs_fs.h:30, from fs/lockd/clntlock.c:13: include/linux/dma-mapping.h: In function 'dma_map_resource': >> include/linux/dma-mapping.h:274:16: warning: unused variable 'pfn' [-Wunused-variable] unsigned long pfn = __phys_to_pfn(phys_addr); ^~~ The pfn value is only used once in the call to pfn_valid(), remove the variable and calculate the pfn when it's needed. Note that the kbuild report is old and PHYS_PFN() is now used instead of __phys_to_pfn() to calculate the pfn. Signed-off-by: Niklas Söderlund Signed-off-by: Vinod Koul --- include/linux/dma-mapping.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index ff7c87fb0305..642cb4c7ad37 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -271,13 +271,12 @@ static inline dma_addr_t dma_map_resource(struct device *dev, unsigned long attrs) { struct dma_map_ops *ops = get_dma_ops(dev); - unsigned long pfn = PHYS_PFN(phys_addr); dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); /* Don't allow RAM to be mapped */ - BUG_ON(pfn_valid(pfn)); + BUG_ON(pfn_valid(PHYS_PFN(phys_addr))); addr = phys_addr; if (ops->map_resource) -- cgit v1.2.3 From bd11f0741fa5a2c296629898ad07759dd12b35bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 27 Sep 2016 23:57:58 -0700 Subject: ipv6 addrconf: implement RFC7559 router solicitation backoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements: https://tools.ietf.org/html/rfc7559 Backoff is performed according to RFC3315 section 14: https://tools.ietf.org/html/rfc3315#section-14 We allow setting /proc/sys/net/ipv6/conf/*/router_solicitations to a negative value meaning an unlimited number of retransmits, and we make this the new default (inline with the RFC). We also add a new setting: /proc/sys/net/ipv6/conf/*/router_solicitation_max_interval defaulting to 1 hour (per RFC recommendation). Signed-off-by: Maciej Żenczykowski Acked-by: Erik Kline Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c6dbcd84a2c7..7e9a789be5e0 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -18,6 +18,7 @@ struct ipv6_devconf { __s32 dad_transmits; __s32 rtr_solicits; __s32 rtr_solicit_interval; + __s32 rtr_solicit_max_interval; __s32 rtr_solicit_delay; __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; -- cgit v1.2.3 From 38a3e1fc1dac480f3672ab22fc97e1f995c80ed7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 6 Sep 2016 16:00:47 +0200 Subject: sched/wait: Fix abort_exclusive_wait(), it should pass TASK_NORMAL to wake_up() Otherwise this logic only works if mode is "compatible" with another exclusive waiter. If some wq has both TASK_INTERRUPTIBLE and TASK_UNINTERRUPTIBLE waiters, abort_exclusive_wait() won't wait an uninterruptible waiter. The main user is __wait_on_bit_lock() and currently it is fine but only because TASK_KILLABLE includes TASK_UNINTERRUPTIBLE and we do not have lock_page_interruptible() yet. Just use TASK_NORMAL and remove the "mode" arg from abort_exclusive_wait(). Yes, this means that (say) wake_up_interruptible() can wake up the non- interruptible waiter(s), but I think this is fine. And in fact I think that abort_exclusive_wait() must die, see the next change. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Al Viro Cc: Bart Van Assche Cc: Johannes Weiner Cc: Linus Torvalds Cc: Mike Galbraith Cc: Neil Brown Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160906140047.GA6157@redhat.com Signed-off-by: Ingo Molnar --- include/linux/wait.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index c3ff74d764fa..e4cfd1ed726e 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -281,8 +281,8 @@ wait_queue_head_t *bit_waitqueue(void *, int); if (___wait_is_interruptible(state) && __int) { \ __ret = __int; \ if (exclusive) { \ - abort_exclusive_wait(&wq, &__wait, \ - state, NULL); \ + abort_exclusive_wait(&wq, &__wait, \ + NULL); \ goto __out; \ } \ break; \ @@ -989,7 +989,7 @@ void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state); void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); -void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key); +void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, void *key); long wait_woken(wait_queue_t *wait, unsigned mode, long timeout); int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -- cgit v1.2.3 From b1ea06a90f528e516929a4da1d9b8838752bceb9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Sep 2016 18:48:15 +0200 Subject: sched/wait: Avoid abort_exclusive_wait() in ___wait_event() ___wait_event() doesn't really need abort_exclusive_wait(), we can simply change prepare_to_wait_event() to remove the waiter from q->task_list if it was interrupted. This simplifies the code/logic, and this way prepare_to_wait_event() can have more users, see the next change. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Al Viro Cc: Bart Van Assche Cc: Johannes Weiner Cc: Linus Torvalds Cc: Mike Galbraith Cc: Neil Brown Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160908164815.GA18801@redhat.com Signed-off-by: Ingo Molnar -- include/linux/wait.h | 7 +------ kernel/sched/wait.c | 35 +++++++++++++++++++++++++---------- 2 files changed, 26 insertions(+), 16 deletions(-) --- include/linux/wait.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index e4cfd1ed726e..7261dcbe5afe 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -280,12 +280,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); \ if (___wait_is_interruptible(state) && __int) { \ __ret = __int; \ - if (exclusive) { \ - abort_exclusive_wait(&wq, &__wait, \ - NULL); \ - goto __out; \ - } \ - break; \ + goto __out; \ } \ \ cmd; \ -- cgit v1.2.3 From eaf9ef52241b545fe63621266bfc6fd8b06559ff Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 6 Sep 2016 16:00:53 +0200 Subject: sched/wait: Avoid abort_exclusive_wait() in __wait_on_bit_lock() __wait_on_bit_lock() doesn't need abort_exclusive_wait() too. Right now it can't use prepare_to_wait_event() (see the next change), but it can do the additional finish_wait() if action() fails. abort_exclusive_wait() no longer has callers, remove it. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Al Viro Cc: Bart Van Assche Cc: Johannes Weiner Cc: Linus Torvalds Cc: Mike Galbraith Cc: Neil Brown Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160906140053.GA6164@redhat.com Signed-off-by: Ingo Molnar --- include/linux/wait.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 7261dcbe5afe..19c75f9545ce 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -984,7 +984,6 @@ void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state); void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); -void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, void *key); long wait_woken(wait_queue_t *wait, unsigned mode, long timeout); int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -- cgit v1.2.3 From 0176beaffbe9ed627b6a4dfa61d640f1a848086f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 6 Sep 2016 16:00:55 +0200 Subject: sched/wait: Introduce init_wait_entry() The partial initialization of wait_queue_t in prepare_to_wait_event() looks ugly. This was done to shrink .text, but we can simply add the new helper which does the full initialization and shrink the compiled code a bit more. And. This way prepare_to_wait_event() can have more users. In particular we are ready to remove the signal_pending_state() checks from wait_bit_action_f helpers and change __wait_on_bit_lock() to use prepare_to_wait_event(). Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Al Viro Cc: Bart Van Assche Cc: Johannes Weiner Cc: Linus Torvalds Cc: Mike Galbraith Cc: Neil Brown Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160906140055.GA6167@redhat.com Signed-off-by: Ingo Molnar --- include/linux/wait.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 19c75f9545ce..2408e8d5c05c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -248,6 +248,8 @@ wait_queue_head_t *bit_waitqueue(void *, int); (!__builtin_constant_p(state) || \ state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ +extern void init_wait_entry(wait_queue_t *__wait, int flags); + /* * The below macro ___wait_event() has an explicit shadow of the __ret * variable when used from the wait_event_*() macros. @@ -266,12 +268,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); wait_queue_t __wait; \ long __ret = ret; /* explicit shadow */ \ \ - INIT_LIST_HEAD(&__wait.task_list); \ - if (exclusive) \ - __wait.flags = WQ_FLAG_EXCLUSIVE; \ - else \ - __wait.flags = 0; \ - \ + init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \ for (;;) { \ long __int = prepare_to_wait_event(&wq, &__wait, state);\ \ -- cgit v1.2.3 From 24fc7edb92eea05946119cc0258c891c26b3b469 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 May 2016 10:37:59 +0200 Subject: sched/core: Introduce 'struct sched_domain_shared' Since struct sched_domain is strictly per cpu; introduce a structure that is shared between all 'identical' sched_domains. Limit to SD_SHARE_PKG_RESOURCES domains for now, as we'll only use it for shared cache state; if another use comes up later we can easily relax this. While the sched_group's are normally shared between CPUs, these are not natural to use when we need some shared state on a domain level -- since that would require the domain to have a parent, which is not a given. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b99fcd1b341e..8a878b9649a1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1067,6 +1067,10 @@ extern int sched_domain_level_max; struct sched_group; +struct sched_domain_shared { + atomic_t ref; +}; + struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ @@ -1135,6 +1139,7 @@ struct sched_domain { void *private; /* used during construction */ struct rcu_head rcu; /* used during destruction */ }; + struct sched_domain_shared *shared; unsigned int span_weight; /* @@ -1168,6 +1173,7 @@ typedef int (*sched_domain_flags_f)(void); struct sd_data { struct sched_domain **__percpu sd; + struct sched_domain_shared **__percpu sds; struct sched_group **__percpu sg; struct sched_group_capacity **__percpu sgc; }; -- cgit v1.2.3 From 0e369d757578b23ac50b893f920aa50fdbc45fb6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 May 2016 10:38:01 +0200 Subject: sched/core: Replace sd_busy/nr_busy_cpus with sched_domain_shared Move the nr_busy_cpus thing from its hacky sd->parent->groups->sgc location into the much more natural sched_domain_shared location. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8a878b9649a1..98888f1a03bc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1069,6 +1069,7 @@ struct sched_group; struct sched_domain_shared { atomic_t ref; + atomic_t nr_busy_cpus; }; struct sched_domain { -- cgit v1.2.3 From 10e2f1acd0106c05229f94c70a344ce3a2c8008b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 May 2016 10:38:05 +0200 Subject: sched/core: Rewrite and improve select_idle_siblings() select_idle_siblings() is a known pain point for a number of workloads; it either does too much or not enough and sometimes just does plain wrong. This rewrite attempts to address a number of issues (but sadly not all). The current code does an unconditional sched_domain iteration; with the intent of finding an idle core (on SMT hardware). The problems which this patch tries to address are: - its pointless to look for idle cores if the machine is real busy; at which point you're just wasting cycles. - it's behaviour is inconsistent between SMT and !SMT hardware in that !SMT hardware ends up doing a scan for any idle CPU in the LLC domain, while SMT hardware does a scan for idle cores and if that fails, falls back to a scan for idle threads on the 'target' core. The new code replaces the sched_domain scan with 3 explicit scans: 1) search for an idle core in the LLC 2) search for an idle CPU in the LLC 3) search for an idle thread in the 'target' core where 1 and 3 are conditional on SMT support and 1 and 2 have runtime heuristics to skip the step. Step 1) is conditional on sd_llc_shared->has_idle_cores; when a cpu goes idle and sd_llc_shared->has_idle_cores is false, we scan all SMT siblings of the CPU going idle. Similarly, we clear sd_llc_shared->has_idle_cores when we fail to find an idle core. Step 2) tracks the average cost of the scan and compares this to the average idle time guestimate for the CPU doing the wakeup. There is a significant fudge factor involved to deal with the variability of the averages. Esp. hackbench was sensitive to this. Step 3) is unconditional; we assume (also per step 1) that scanning all SMT siblings in a core is 'cheap'. With this; SMT systems gain step 2, which cures a few benchmarks -- notably one from Facebook. One 'feature' of the sched_domain iteration, which we preserve in the new code, is that it would start scanning from the 'target' CPU, instead of scanning the cpumask in cpu id order. This avoids multiple CPUs in the LLC scanning for idle to gang up and find the same CPU quite as much. The down side is that tasks can end up hopping across the LLC for no apparent reason. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 98888f1a03bc..2c30ed860d66 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1070,6 +1070,7 @@ struct sched_group; struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; + int has_idle_cores; }; struct sched_domain { @@ -1102,6 +1103,8 @@ struct sched_domain { u64 max_newidle_lb_cost; unsigned long next_decay_max_lb_cost; + u64 avg_scan_cost; /* select_idle_sibling */ + #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; -- cgit v1.2.3 From a458ae2ea616420f74480f0f5ed67ca0f3b5dbf7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 20 Sep 2016 20:29:40 +0200 Subject: sched/core, ia64: Rename set_curr_task() Rename the ia64 only set_curr_task() function to free up the name. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c30ed860d66..ad51978ff15e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2581,7 +2581,7 @@ static inline bool is_idle_task(const struct task_struct *p) return p->pid == 0; } extern struct task_struct *curr_task(int cpu); -extern void set_curr_task(int cpu, struct task_struct *p); +extern void ia64_set_curr_task(int cpu, struct task_struct *p); void yield(void); -- cgit v1.2.3 From 68107df5f2cb5dc3785be40162bfe2f19a178bbb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 26 Sep 2016 02:29:19 +0200 Subject: u64_stats: Introduce IRQs disabled helpers Introduce light versions of u64_stats helpers for context where either preempt or IRQs are disabled. This way we can make this library usable by scheduler irqtime accounting which currenty implement its ad-hoc version. Signed-off-by: Frederic Weisbecker Cc: Eric Dumazet Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1474849761-12678-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/u64_stats_sync.h | 45 ++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index d3a2bb712af3..650f3dd6b800 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -103,31 +103,42 @@ static inline void u64_stats_update_end_raw(struct u64_stats_sync *syncp) #endif } -static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) +static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) return read_seqcount_begin(&syncp->seq); #else -#if BITS_PER_LONG==32 - preempt_disable(); -#endif return 0; #endif } -static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, +static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) +{ +#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) + preempt_disable(); +#endif + return __u64_stats_fetch_begin(syncp); +} + +static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) return read_seqcount_retry(&syncp->seq, start); #else -#if BITS_PER_LONG==32 - preempt_enable(); -#endif return false; #endif } +static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) +{ +#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) + preempt_enable(); +#endif + return __u64_stats_fetch_retry(syncp, start); +} + /* * In case irq handlers can update u64 counters, readers can use following helpers * - SMP 32bit arches use seqcount protection, irq safe. @@ -136,27 +147,19 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, */ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - return read_seqcount_begin(&syncp->seq); -#else -#if BITS_PER_LONG==32 +#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) local_irq_disable(); #endif - return 0; -#endif + return __u64_stats_fetch_begin(syncp); } static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, - unsigned int start) + unsigned int start) { -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - return read_seqcount_retry(&syncp->seq, start); -#else -#if BITS_PER_LONG==32 +#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) local_irq_enable(); #endif - return false; -#endif + return __u64_stats_fetch_retry(syncp, start); } #endif /* _LINUX_U64_STATS_SYNC_H */ -- cgit v1.2.3 From 4d737042d6c4ee10a632cf94b953169d13955a40 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 7 Sep 2016 13:19:00 -0400 Subject: xen/x86: Convert to hotplug state machine Switch to new CPU hotplug infrastructure. Signed-off-by: Boris Ostrovsky Suggested-by: Sebastian Andrzej Siewior Signed-off-by: David Vrabel --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 242bf530edfc..33d352f3295d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -21,6 +21,7 @@ enum cpuhp_state { CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, CPUHP_RCUTREE_PREP, + CPUHP_XEN_PREPARE, CPUHP_NOTIFY_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_BRINGUP_CPU, -- cgit v1.2.3 From c8761e2016aa51a2829563b02a0a55913bdb0be8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 7 Sep 2016 13:19:01 -0400 Subject: xen/events: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 33d352f3295d..5f603166831c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -22,6 +22,7 @@ enum cpuhp_state { CPUHP_SMPCFD_PREPARE, CPUHP_RCUTREE_PREP, CPUHP_XEN_PREPARE, + CPUHP_XEN_EVTCHN_PREPARE, CPUHP_NOTIFY_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_BRINGUP_CPU, -- cgit v1.2.3 From a468f0ef516fda9c7d91bb550d458e853d76955e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 19 Sep 2016 17:55:10 -0700 Subject: f2fs: use crc and cp version to determine roll-forward recovery Previously, we used cp_version only to detect recoverable dnodes. In order to avoid same garbage cp_version, we needed to truncate the next dnode during checkpoint, resulting in additional discard or data write. If we can distinguish this by using crc in addition to cp_version, we can remove this overhead. There is backward compatibility concern where it changes node_footer layout. So, this patch introduces a new checkpoint flag, CP_CRC_RECOVERY_FLAG, to detect new layout. New layout will be activated only when this flag is set. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 4c02c6521fef..422630b8e588 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -100,6 +100,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_CRC_RECOVERY_FLAG 0x00000040 #define CP_FASTBOOT_FLAG 0x00000020 #define CP_FSCK_FLAG 0x00000010 #define CP_ERROR_FLAG 0x00000008 -- cgit v1.2.3 From d29216842a85c7970c536108e093963f02714498 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 28 Sep 2016 00:27:17 -0500 Subject: mnt: Add a per mount namespace limit on the number of mounts CAI Qian pointed out that the semantics of shared subtrees make it possible to create an exponentially increasing number of mounts in a mount namespace. mkdir /tmp/1 /tmp/2 mount --make-rshared / for i in $(seq 1 20) ; do mount --bind /tmp/1 /tmp/2 ; done Will create create 2^20 or 1048576 mounts, which is a practical problem as some people have managed to hit this by accident. As such CVE-2016-6213 was assigned. Ian Kent described the situation for autofs users as follows: > The number of mounts for direct mount maps is usually not very large because of > the way they are implemented, large direct mount maps can have performance > problems. There can be anywhere from a few (likely case a few hundred) to less > than 10000, plus mounts that have been triggered and not yet expired. > > Indirect mounts have one autofs mount at the root plus the number of mounts that > have been triggered and not yet expired. > > The number of autofs indirect map entries can range from a few to the common > case of several thousand and in rare cases up to between 30000 and 50000. I've > not heard of people with maps larger than 50000 entries. > > The larger the number of map entries the greater the possibility for a large > number of active mounts so it's not hard to expect cases of a 1000 or somewhat > more active mounts. So I am setting the default number of mounts allowed per mount namespace at 100,000. This is more than enough for any use case I know of, but small enough to quickly stop an exponential increase in mounts. Which should be perfect to catch misconfigurations and malfunctioning programs. For anyone who needs a higher limit this can be changed by writing to the new /proc/sys/fs/mount-max sysctl. Tested-by: CAI Qian Signed-off-by: "Eric W. Biederman" --- include/linux/mount.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 54a594d49733..1172cce949a4 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -96,4 +96,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts); extern dev_t name_to_dev_t(const char *name); +extern unsigned int sysctl_mount_max; + #endif /* _LINUX_MOUNT_H */ -- cgit v1.2.3 From e856a231d5d5742fe7c63e3a2b266bef668af5b4 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Thu, 29 Sep 2016 10:44:37 -0500 Subject: sunrpc: add hash_cred() function to rpc_authops struct Currently, a single hash algorithm is used to hash the auth_cred for the credcache for all rpc_auth types. Add a hash_cred() function to the rpc_authops struct to allow a hash function specific to each auth flavor. Signed-off-by: Frank Sorenson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 4ccf184e971f..b1bc62ba20a2 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -131,6 +131,7 @@ struct rpc_authops { struct rpc_auth * (*create)(struct rpc_auth_create_args *, struct rpc_clnt *); void (*destroy)(struct rpc_auth *); + int (*hash_cred)(struct auth_cred *, unsigned int); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t); int (*list_pseudoflavors)(rpc_authflavor_t *, int); -- cgit v1.2.3 From 22f2ac51b6d643666f4db093f13144f773ff3f3a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 30 Sep 2016 15:11:29 -0700 Subject: mm: workingset: fix crash in shadow node shrinker caused by replace_page_cache_page() Antonio reports the following crash when using fuse under memory pressure: kernel BUG at /build/linux-a2WvEb/linux-4.4.0/mm/workingset.c:346! invalid opcode: 0000 [#1] SMP Modules linked in: all of them CPU: 2 PID: 63 Comm: kswapd0 Not tainted 4.4.0-36-generic #55-Ubuntu Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 3904 04/27/2013 task: ffff88040cae6040 ti: ffff880407488000 task.ti: ffff880407488000 RIP: shadow_lru_isolate+0x181/0x190 Call Trace: __list_lru_walk_one.isra.3+0x8f/0x130 list_lru_walk_one+0x23/0x30 scan_shadow_nodes+0x34/0x50 shrink_slab.part.40+0x1ed/0x3d0 shrink_zone+0x2ca/0x2e0 kswapd+0x51e/0x990 kthread+0xd8/0xf0 ret_from_fork+0x3f/0x70 which corresponds to the following sanity check in the shadow node tracking: BUG_ON(node->count & RADIX_TREE_COUNT_MASK); The workingset code tracks radix tree nodes that exclusively contain shadow entries of evicted pages in them, and this (somewhat obscure) line checks whether there are real pages left that would interfere with reclaim of the radix tree node under memory pressure. While discussing ways how fuse might sneak pages into the radix tree past the workingset code, Miklos pointed to replace_page_cache_page(), and indeed there is a problem there: it properly accounts for the old page being removed - __delete_from_page_cache() does that - but then does a raw raw radix_tree_insert(), not accounting for the replacement page. Eventually the page count bits in node->count underflow while leaving the node incorrectly linked to the shadow node LRU. To address this, make sure replace_page_cache_page() uses the tracked page insertion code, page_cache_tree_insert(). This fixes the page accounting and makes sure page-containing nodes are properly unlinked from the shadow node LRU again. Also, make the sanity checks a bit less obscure by using the helpers for checking the number of pages and shadows in a radix tree node. Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check") Link: http://lkml.kernel.org/r/20160919155822.29498-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Antonio SJ Musumeci Debugged-by: Miklos Szeredi Cc: [3.15+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index b17cc4830fa6..4a529c984a3f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -257,6 +257,7 @@ static inline void workingset_node_pages_inc(struct radix_tree_node *node) static inline void workingset_node_pages_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_pages(node)); node->count--; } @@ -272,6 +273,7 @@ static inline void workingset_node_shadows_inc(struct radix_tree_node *node) static inline void workingset_node_shadows_dec(struct radix_tree_node *node) { + VM_BUG_ON(!workingset_node_shadows(node)); node->count -= 1U << RADIX_TREE_COUNT_SHIFT; } -- cgit v1.2.3 From 37aa7271d9742b574763e5ce019bde9c49aa8bfe Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 30 Sep 2016 15:11:35 -0700 Subject: include/linux/property.h: fix typo/compile error This fixes commit d76eebfa175e ("include/linux/property.h: fix build issues with gcc-4.4.4"). With that commit we get the following compile error when using the PROPERTY_ENTRY_INTEGER_ARRAY macro. include/linux/property.h:201:39: error: `u32_data' undeclared (first use in this function) PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u32, _val_) ^ include/linux/property.h:193:17: note: in definition of macro `PROPERTY_ENTRY_INTEGER_ARRAY' { .pointer = { _type_##_data = _val_ } }, \ ^ This needs a '.' to reference the union member. It seems this was just overlooked here since it is done correctly in similar constructs in other parts of the original commit. This fix is in preparation of upcoming commits that will use this macro. Fixes: commit d76eebfa175e ("include/linux/property.h: fix build issues with gcc-4.4.4") Link: http://lkml.kernel.org/r/2de3b929290d88a723ed829a3e3cbd02044714df.1475114627.git.johnyoun@synopsys.com Signed-off-by: John Youn Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 3a2f9ae25c86..856e50b2140c 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -190,7 +190,7 @@ struct property_entry { .length = ARRAY_SIZE(_val_) * sizeof(_type_), \ .is_array = true, \ .is_string = false, \ - { .pointer = { _type_##_data = _val_ } }, \ + { .pointer = { ._type_##_data = _val_ } }, \ } #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \ -- cgit v1.2.3 From e046114af5fcafe8d6d3f0b6ccb99804bad34bfb Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Fri, 30 Sep 2016 17:19:31 -0600 Subject: libnvdimm: clear the internal poison_list when clearing badblocks nvdimm_clear_poison cleared the user-visible badblocks, and sent commands to the NVDIMM to clear the areas marked as 'poison', but it neglected to clear the same areas from the internal poison_list which is used to marshal ARS results before sorting them by namespace. As a result, once on-demand ARS functionality was added: 37b137f nfit, libnvdimm: allow an ARS scrub to be triggered on demand A scrub triggered from either sysfs or an MCE was found to be adding stale entries that had been cleared from gendisk->badblocks, but were still present in nvdimm_bus->poison_list. Additionally, the stale entries could be triggered into producing stale disk->badblocks by simply disabling and re-enabling the namespace or region. This adds the missing step of clearing poison_list entries when clearing poison, so that it is always in sync with badblocks. Fixes: 37b137f ("nfit, libnvdimm: allow an ARS scrub to be triggered on demand") Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index ad18d0531b6e..4a5f8c51f2a5 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -129,6 +129,8 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( } int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); +void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, + phys_addr_t start, unsigned int len); struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc); void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); -- cgit v1.2.3 From 44c462eb9e19dfa089b454271dd2dff5eaf1ad6d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 19 Sep 2016 16:38:50 -0700 Subject: libnvdimm, region: move region-mapping input-paramters to nd_mapping_desc Before we add more libnvdimm-private fields to nd_mapping make it clear which parameters are input vs libnvdimm internals. Use struct nd_mapping_desc instead of struct nd_mapping in nd_region_desc and make struct nd_mapping private to libnvdimm. Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 4a5f8c51f2a5..f4947fda11e7 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -50,23 +50,6 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc); -struct nd_namespace_label; -struct nvdimm_drvdata; - -struct nd_mapping { - struct nvdimm *nvdimm; - struct nd_namespace_label **labels; - u64 start; - u64 size; - /* - * @ndd is for private use at region enable / disable time for - * get_ndd() + put_ndd(), all other nd_mapping to ndd - * conversions use to_ndd() which respects enabled state of the - * nvdimm. - */ - struct nvdimm_drvdata *ndd; -}; - struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; unsigned long cmd_mask; @@ -89,9 +72,15 @@ struct nd_interleave_set { u64 cookie; }; +struct nd_mapping_desc { + struct nvdimm *nvdimm; + u64 start; + u64 size; +}; + struct nd_region_desc { struct resource *res; - struct nd_mapping *nd_mapping; + struct nd_mapping_desc *mapping; u16 num_mappings; const struct attribute_group **attr_groups; struct nd_interleave_set *nd_set; -- cgit v1.2.3 From 433c0e04bc06da6d049c691a9ef238d61edb841c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sun, 2 Oct 2016 17:46:38 -0700 Subject: remoteproc: Split driver and consumer dereferencing In order to be able to lock a rproc driver implementations only when used by a client, we must differ between the dereference operation of a client and the implementation itself. This patch brings no functional change. Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index c321eab5054e..930023b7c825 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -493,6 +493,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name, void rproc_put(struct rproc *rproc); int rproc_add(struct rproc *rproc); int rproc_del(struct rproc *rproc); +void rproc_free(struct rproc *rproc); int rproc_boot(struct rproc *rproc); void rproc_shutdown(struct rproc *rproc); -- cgit v1.2.3 From fcff415c9421b417ef91d48f546f3c4566ddc358 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 14 Sep 2016 16:39:51 +0800 Subject: ceph: handle CEPH_SESSION_REJECT message Signed-off-by: Yan, Zheng --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index c086e63dcee1..f96de8de4fa7 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -281,6 +281,7 @@ enum { CEPH_SESSION_FLUSHMSG, CEPH_SESSION_FLUSHMSG_ACK, CEPH_SESSION_FORCE_RO, + CEPH_SESSION_REJECT, }; extern const char *ceph_session_op_name(int op); -- cgit v1.2.3 From 71be6b4942dd64bc17728f82f787be98fd8afed7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Oct 2016 09:11:14 -0700 Subject: vfs: add a FALLOC_FL_UNSHARE mode to fallocate to unshare a range of blocks Add a new fallocate mode flag that explicitly unshares blocks on filesystems that support such features. The new flag can only be used with an allocate-mode fallocate call. Signed-off-by: Darrick J. Wong --- include/linux/falloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/falloc.h b/include/linux/falloc.h index 996111000a8c..7494dc67c66f 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -25,6 +25,7 @@ struct space_resv { FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_COLLAPSE_RANGE | \ FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE) + FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) #endif /* _FALLOC_H_ */ -- cgit v1.2.3 From f4c1181f0fdeab19fb0b656abfb41bee7ca080b8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 3 Oct 2016 10:59:32 +0200 Subject: gpio: OF: localize some gpiochip init functions of_gpiochip_add() and of_gpiochip_remove() are only used locally in the gpio subsystem so move these functions to the local header. Signed-off-by: Linus Walleij --- include/linux/of_gpio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 092186c62ff4..3f87ea5b8bee 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -61,8 +61,6 @@ static inline int of_mm_gpiochip_add(struct device_node *np, } extern void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc); -extern int of_gpiochip_add(struct gpio_chip *gc); -extern void of_gpiochip_remove(struct gpio_chip *gc); extern int of_gpio_simple_xlate(struct gpio_chip *gc, const struct of_phandle_args *gpiospec, u32 *flags); @@ -86,9 +84,6 @@ static inline int of_gpio_simple_xlate(struct gpio_chip *gc, return -ENOSYS; } -static inline int of_gpiochip_add(struct gpio_chip *gc) { return 0; } -static inline void of_gpiochip_remove(struct gpio_chip *gc) { } - #endif /* CONFIG_OF_GPIO */ /** -- cgit v1.2.3 From 79fddc4efd5d4de5cf210fe5ecf4d2734140849a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 17 Sep 2016 22:38:20 -0400 Subject: new helper: add_to_pipe() single-buffer analogue of splice_to_pipe(); vmsplice_to_pipe() switched to that, leaving splice_to_pipe() only for ->splice_read() instances (and that only until they are converted as well). Signed-off-by: Al Viro --- include/linux/splice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/splice.h b/include/linux/splice.h index da2751d3b93d..58b300f37534 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -72,6 +72,8 @@ extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct splice_desc *, splice_actor *); extern ssize_t splice_to_pipe(struct pipe_inode_info *, struct splice_pipe_desc *); +extern ssize_t add_to_pipe(struct pipe_inode_info *, + struct pipe_buffer *); extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, splice_direct_actor *); -- cgit v1.2.3 From 25869262ef7af24ccde988867ac3eb1c3d4b88d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 17 Sep 2016 21:02:10 -0400 Subject: skb_splice_bits(): get rid of callback since pipe_lock is the outermost now, we don't need to drop/regain socket locks around the call of splice_to_pipe() from skb_splice_bits(), which kills the need to have a socket-specific callback; we can just call splice_to_pipe() and be done with that. Signed-off-by: Al Viro --- include/linux/skbuff.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0f665cb26b50..f520251ec43f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3021,15 +3021,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, __wsum csum); -ssize_t skb_socket_splice(struct sock *sk, - struct pipe_inode_info *pipe, - struct splice_pipe_desc *spd); int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, - unsigned int flags, - ssize_t (*splice_cb)(struct sock *, - struct pipe_inode_info *, - struct splice_pipe_desc *)); + unsigned int flags); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, -- cgit v1.2.3 From 0a7fb11c23c0fb8f5ad37f285f40348f1ab9ccbd Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sat, 1 Oct 2016 21:59:55 +0300 Subject: qed: Add Light L2 support Other protocols beside the networking driver need the ability of passing some L2 traffic, usually [although not limited] for the purpose of some management traffic. Signed-off-by: Yuval Mintz Signed-off-by: Ram Amrani Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 1 + include/linux/qed/qed_ll2_if.h | 139 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 include/linux/qed/qed_ll2_if.h (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e4546abcea08..c2d74e8785cf 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -627,6 +627,7 @@ enum DP_MODULE { QED_MSG_SP = 0x100000, QED_MSG_STORAGE = 0x200000, QED_MSG_CXT = 0x800000, + QED_MSG_LL2 = 0x1000000, QED_MSG_ILT = 0x2000000, QED_MSG_ROCE = 0x4000000, QED_MSG_DEBUG = 0x8000000, diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h new file mode 100644 index 000000000000..fd75c265dba3 --- /dev/null +++ b/include/linux/qed/qed_ll2_if.h @@ -0,0 +1,139 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_LL2_IF_H +#define _QED_LL2_IF_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct qed_ll2_stats { + u64 gsi_invalid_hdr; + u64 gsi_invalid_pkt_length; + u64 gsi_unsupported_pkt_typ; + u64 gsi_crcchksm_error; + + u64 packet_too_big_discard; + u64 no_buff_discard; + + u64 rcv_ucast_bytes; + u64 rcv_mcast_bytes; + u64 rcv_bcast_bytes; + u64 rcv_ucast_pkts; + u64 rcv_mcast_pkts; + u64 rcv_bcast_pkts; + + u64 sent_ucast_bytes; + u64 sent_mcast_bytes; + u64 sent_bcast_bytes; + u64 sent_ucast_pkts; + u64 sent_mcast_pkts; + u64 sent_bcast_pkts; +}; + +#define QED_LL2_UNUSED_HANDLE (0xff) + +struct qed_ll2_cb_ops { + int (*rx_cb)(void *, struct sk_buff *, u32, u32); + int (*tx_cb)(void *, struct sk_buff *, bool); +}; + +struct qed_ll2_params { + u16 mtu; + bool drop_ttl0_packets; + bool rx_vlan_stripping; + u8 tx_tc; + bool frags_mapped; + u8 ll2_mac_address[ETH_ALEN]; +}; + +struct qed_ll2_ops { +/** + * @brief start - initializes ll2 + * + * @param cdev + * @param params - protocol driver configuration for the ll2. + * + * @return 0 on success, otherwise error value. + */ + int (*start)(struct qed_dev *cdev, struct qed_ll2_params *params); + +/** + * @brief stop - stops the ll2 + * + * @param cdev + * + * @return 0 on success, otherwise error value. + */ + int (*stop)(struct qed_dev *cdev); + +/** + * @brief start_xmit - transmits an skb over the ll2 interface + * + * @param cdev + * @param skb + * + * @return 0 on success, otherwise error value. + */ + int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb); + +/** + * @brief register_cb_ops - protocol driver register the callback for Rx/Tx + * packets. Should be called before `start'. + * + * @param cdev + * @param cookie - to be passed to the callback functions. + * @param ops - the callback functions to register for Rx / Tx. + * + * @return 0 on success, otherwise error value. + */ + void (*register_cb_ops)(struct qed_dev *cdev, + const struct qed_ll2_cb_ops *ops, + void *cookie); + +/** + * @brief get LL2 related statistics + * + * @param cdev + * @param stats - pointer to struct that would be filled with stats + * + * @return 0 on success, error otherwise. + */ + int (*get_stats)(struct qed_dev *cdev, struct qed_ll2_stats *stats); +}; + +#ifdef CONFIG_QED_LL2 +int qed_ll2_alloc_if(struct qed_dev *); +void qed_ll2_dealloc_if(struct qed_dev *); +#else +static const struct qed_ll2_ops qed_ll2_ops_pass = { + .start = NULL, + .stop = NULL, + .start_xmit = NULL, + .register_cb_ops = NULL, + .get_stats = NULL, +}; + +static inline int qed_ll2_alloc_if(struct qed_dev *cdev) +{ + return 0; +} + +static inline void qed_ll2_dealloc_if(struct qed_dev *cdev) +{ +} +#endif +#endif -- cgit v1.2.3 From cee9fbd8e2e9e713cd8bf227c6492fd8854de74b Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:56 +0300 Subject: qede: Add qedr framework Adds a skeletal implementation of the qede RoCE driver - The qedr has some dependencies of the state of the underlying base interface. This adds some logic required with mutual registrations and the ability to pass updates on 'intresting' events. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 3 +- include/linux/qed/qede_roce.h | 88 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 include/linux/qed/qede_roce.h (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index c2d74e8785cf..e313742b571d 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -260,11 +260,10 @@ struct qed_dev_info { /* MFW version */ u32 mfw_rev; - bool rdma_supported; - u32 flash_size; u8 mf_mode; bool tx_switching; + bool rdma_supported; }; enum qed_sb_type { diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h new file mode 100644 index 000000000000..99fbe6d55acb --- /dev/null +++ b/include/linux/qed/qede_roce.h @@ -0,0 +1,88 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef QEDE_ROCE_H +#define QEDE_ROCE_H + +struct qedr_dev; +struct qed_dev; +struct qede_dev; + +enum qede_roce_event { + QEDE_UP, + QEDE_DOWN, + QEDE_CHANGE_ADDR, + QEDE_CLOSE +}; + +struct qede_roce_event_work { + struct list_head list; + struct work_struct work; + void *ptr; + enum qede_roce_event event; +}; + +struct qedr_driver { + unsigned char name[32]; + + struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *, + struct net_device *); + + void (*remove)(struct qedr_dev *); + void (*notify)(struct qedr_dev *, enum qede_roce_event); +}; + +/* APIs for RoCE driver to register callback handlers, + * which will be invoked when device is added, removed, ifup, ifdown + */ +int qede_roce_register_driver(struct qedr_driver *drv); +void qede_roce_unregister_driver(struct qedr_driver *drv); + +bool qede_roce_supported(struct qede_dev *dev); + +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +int qede_roce_dev_add(struct qede_dev *dev); +void qede_roce_dev_event_open(struct qede_dev *dev); +void qede_roce_dev_event_close(struct qede_dev *dev); +void qede_roce_dev_remove(struct qede_dev *dev); +void qede_roce_event_changeaddr(struct qede_dev *qedr); +#else +static inline int qede_roce_dev_add(struct qede_dev *dev) +{ + return 0; +} + +static inline void qede_roce_dev_event_open(struct qede_dev *dev) {} +static inline void qede_roce_dev_event_close(struct qede_dev *dev) {} +static inline void qede_roce_dev_remove(struct qede_dev *dev) {} +static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {} +#endif +#endif -- cgit v1.2.3 From 51ff17251c9c2c2e71974149d22bc73ea09c27cc Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:57 +0300 Subject: qed: Add support for RoCE hw init This adds the backbone required for the various HW initalizations which are necessary for the qedr driver - FW notification, resource initializations, etc. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 1 + include/linux/qed/qed_if.h | 5 +- include/linux/qed/qed_roce_if.h | 345 ++++++++++++++++++++++++++++++++++++++++ include/linux/qed/rdma_common.h | 1 + 4 files changed, 351 insertions(+), 1 deletion(-) create mode 100644 include/linux/qed/qed_roce_if.h (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 19027635df0d..734deb094618 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -674,6 +674,7 @@ union event_ring_data { struct iscsi_eqe_data iscsi_info; struct malicious_vf_eqe_data malicious_vf; struct initial_cleanup_eqe_data vf_init_cleanup; + struct regpair roce_handle; }; /* Event Ring Entry */ diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e313742b571d..f9ae903bbb84 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -34,6 +34,8 @@ enum dcbx_protocol_type { DCBX_MAX_PROTOCOL_TYPE }; +#define QED_ROCE_PROTOCOL_INDEX (3) + #ifdef CONFIG_DCB #define QED_LLDP_CHASSIS_ID_STAT_LEN 4 #define QED_LLDP_PORT_ID_STAT_LEN 4 @@ -268,6 +270,7 @@ struct qed_dev_info { enum qed_sb_type { QED_SB_TYPE_L2_QUEUE, + QED_SB_TYPE_CNQ, }; enum qed_protocol { @@ -628,7 +631,7 @@ enum DP_MODULE { QED_MSG_CXT = 0x800000, QED_MSG_LL2 = 0x1000000, QED_MSG_ILT = 0x2000000, - QED_MSG_ROCE = 0x4000000, + QED_MSG_RDMA = 0x4000000, QED_MSG_DEBUG = 0x8000000, /* to be added...up to 0x8000000 */ }; diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h new file mode 100644 index 000000000000..0f7d5275e515 --- /dev/null +++ b/include/linux/qed/qed_roce_if.h @@ -0,0 +1,345 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_ROCE_IF_H +#define _QED_ROCE_IF_H +#include +#include +#include +#include +#include +#include +#include +#include + +#define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) + +/* rdma interface */ +enum qed_rdma_tid_type { + QED_RDMA_TID_REGISTERED_MR, + QED_RDMA_TID_FMR, + QED_RDMA_TID_MW_TYPE1, + QED_RDMA_TID_MW_TYPE2A +}; + +struct qed_rdma_events { + void *context; + void (*affiliated_event)(void *context, u8 fw_event_code, + void *fw_handle); + void (*unaffiliated_event)(void *context, u8 event_code); +}; + +struct qed_rdma_device { + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 fw_ver; + + u64 node_guid; + u64 sys_image_guid; + + u8 max_cnq; + u8 max_sge; + u8 max_srq_sge; + u16 max_inline; + u32 max_wqe; + u32 max_srq_wqe; + u8 max_qp_resp_rd_atomic_resc; + u8 max_qp_req_rd_atomic_resc; + u64 max_dev_resp_rd_atomic_resc; + u32 max_cq; + u32 max_qp; + u32 max_srq; + u32 max_mr; + u64 max_mr_size; + u32 max_cqe; + u32 max_mw; + u32 max_fmr; + u32 max_mr_mw_fmr_pbl; + u64 max_mr_mw_fmr_size; + u32 max_pd; + u32 max_ah; + u8 max_pkey; + u16 max_srq_wr; + u8 max_stats_queues; + u32 dev_caps; + + /* Abilty to support RNR-NAK generation */ + +#define QED_RDMA_DEV_CAP_RNR_NAK_MASK 0x1 +#define QED_RDMA_DEV_CAP_RNR_NAK_SHIFT 0 + /* Abilty to support shutdown port */ +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_MASK 0x1 +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_SHIFT 1 + /* Abilty to support port active event */ +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT 2 + /* Abilty to support port change event */ +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_SHIFT 3 + /* Abilty to support system image GUID */ +#define QED_RDMA_DEV_CAP_SYS_IMAGE_MASK 0x1 +#define QED_RDMA_DEV_CAP_SYS_IMAGE_SHIFT 4 + /* Abilty to support bad P_Key counter support */ +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_SHIFT 5 + /* Abilty to support atomic operations */ +#define QED_RDMA_DEV_CAP_ATOMIC_OP_MASK 0x1 +#define QED_RDMA_DEV_CAP_ATOMIC_OP_SHIFT 6 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_SHIFT 7 + /* Abilty to support modifying the maximum number of + * outstanding work requests per QP + */ +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_SHIFT 8 + /* Abilty to support automatic path migration */ +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_MASK 0x1 +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_SHIFT 9 + /* Abilty to support the base memory management extensions */ +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_SHIFT 10 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_SHIFT 11 + /* Abilty to support multipile page sizes per memory region */ +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT 12 + /* Abilty to support block list physical buffer list */ +#define QED_RDMA_DEV_CAP_BLOCK_MODE_MASK 0x1 +#define QED_RDMA_DEV_CAP_BLOCK_MODE_SHIFT 13 + /* Abilty to support zero based virtual addresses */ +#define QED_RDMA_DEV_CAP_ZBVA_MASK 0x1 +#define QED_RDMA_DEV_CAP_ZBVA_SHIFT 14 + /* Abilty to support local invalidate fencing */ +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_MASK 0x1 +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_SHIFT 15 + /* Abilty to support Loopback on QP */ +#define QED_RDMA_DEV_CAP_LB_INDICATOR_MASK 0x1 +#define QED_RDMA_DEV_CAP_LB_INDICATOR_SHIFT 16 + u64 page_size_caps; + u8 dev_ack_delay; + u32 reserved_lkey; + u32 bad_pkey_counter; + struct qed_rdma_events events; +}; + +enum qed_port_state { + QED_RDMA_PORT_UP, + QED_RDMA_PORT_DOWN, +}; + +enum qed_roce_capability { + QED_ROCE_V1 = 1 << 0, + QED_ROCE_V2 = 1 << 1, +}; + +struct qed_rdma_port { + enum qed_port_state port_state; + int link_speed; + u64 max_msg_size; + u8 source_gid_table_len; + void *source_gid_table_ptr; + u8 pkey_table_len; + void *pkey_table_ptr; + u32 pkey_bad_counter; + enum qed_roce_capability capability; +}; + +struct qed_rdma_cnq_params { + u8 num_pbl_pages; + u64 pbl_ptr; +}; + +/* The CQ Mode affects the CQ doorbell transaction size. + * 64/32 bit machines should configure to 32/16 bits respectively. + */ +enum qed_rdma_cq_mode { + QED_RDMA_CQ_MODE_16_BITS, + QED_RDMA_CQ_MODE_32_BITS, +}; + +struct qed_roce_dcqcn_params { + u8 notification_point; + u8 reaction_point; + + /* fields for notification point */ + u32 cnp_send_timeout; + + /* fields for reaction point */ + u32 rl_bc_rate; + u16 rl_max_rate; + u16 rl_r_ai; + u16 rl_r_hai; + u16 dcqcn_g; + u32 dcqcn_k_us; + u32 dcqcn_timeout_us; +}; + +struct qed_rdma_start_in_params { + struct qed_rdma_events *events; + struct qed_rdma_cnq_params cnq_pbl_list[128]; + u8 desired_cnq; + enum qed_rdma_cq_mode cq_mode; + struct qed_roce_dcqcn_params dcqcn_params; + u16 max_mtu; + u8 mac_addr[ETH_ALEN]; + u8 iwarp_flags; +}; + +struct qed_rdma_add_user_out_params { + u16 dpi; + u64 dpi_addr; + u64 dpi_phys_addr; + u32 dpi_size; +}; + +enum roce_mode { + ROCE_V1, + ROCE_V2_IPV4, + ROCE_V2_IPV6, + MAX_ROCE_MODE +}; + +union qed_gid { + u8 bytes[16]; + u16 words[8]; + u32 dwords[4]; + u64 qwords[2]; + u32 ipv4_addr; +}; + +struct qed_rdma_register_tid_in_params { + u32 itid; + enum qed_rdma_tid_type tid_type; + u8 key; + u16 pd; + bool local_read; + bool local_write; + bool remote_read; + bool remote_write; + bool remote_atomic; + bool mw_bind; + u64 pbl_ptr; + bool pbl_two_level; + u8 pbl_page_size_log; + u8 page_size_log; + u32 fbo; + u64 length; + u64 vaddr; + bool zbva; + bool phy_mr; + bool dma_mr; + + bool dif_enabled; + u64 dif_error_addr; + u64 dif_runt_addr; +}; + +struct qed_rdma_create_srq_in_params { + u64 pbl_base_addr; + u64 prod_pair_addr; + u16 num_pages; + u16 pd_id; + u16 page_size; +}; + +struct qed_rdma_create_srq_out_params { + u16 srq_id; +}; + +struct qed_rdma_destroy_srq_in_params { + u16 srq_id; +}; + +struct qed_rdma_modify_srq_in_params { + u32 wqe_limit; + u16 srq_id; +}; + +struct qed_rdma_stats_out_params { + u64 sent_bytes; + u64 sent_pkts; + u64 rcv_bytes; + u64 rcv_pkts; +}; + +struct qed_rdma_counters_out_params { + u64 pd_count; + u64 max_pd; + u64 dpi_count; + u64 max_dpi; + u64 cq_count; + u64 max_cq; + u64 qp_count; + u64 max_qp; + u64 tid_count; + u64 max_tid; +}; + +#define QED_ROCE_TX_HEAD_FAILURE (1) +#define QED_ROCE_TX_FRAG_FAILURE (2) + +enum qed_rdma_type { + QED_RDMA_TYPE_ROCE, +}; + +struct qed_dev_rdma_info { + struct qed_dev_info common; + enum qed_rdma_type rdma_type; +}; + +struct qed_rdma_ops { + const struct qed_common_ops *common; + + int (*fill_dev_info)(struct qed_dev *cdev, + struct qed_dev_rdma_info *info); + void *(*rdma_get_rdma_ctx)(struct qed_dev *cdev); + + int (*rdma_init)(struct qed_dev *dev, + struct qed_rdma_start_in_params *iparams); + + int (*rdma_add_user)(void *rdma_cxt, + struct qed_rdma_add_user_out_params *oparams); + + void (*rdma_remove_user)(void *rdma_cxt, u16 dpi); + int (*rdma_stop)(void *rdma_cxt); + struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt); + int (*rdma_get_start_sb)(struct qed_dev *cdev); + int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev); + void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod); + int (*rdma_get_rdma_int)(struct qed_dev *cdev, + struct qed_int_info *info); + int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); +}; + +const struct qed_rdma_ops *qed_get_rdma_ops(void); + +#endif diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index 187991c1f439..7663725faa94 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -28,6 +28,7 @@ #define RDMA_MAX_PDS (64 * 1024) #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS +#define RDMA_NUM_STATISTIC_COUNTERS_BB MAX_NUM_VPORTS_BB #define RDMA_TASK_TYPE (PROTOCOLID_ROCE) -- cgit v1.2.3 From c295f86e60f5ba67f0f4bba2bb2c22b3cbf01ec1 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:58 +0300 Subject: qed: PD,PKEY and CQ verb support Add support for the configurations of the protection domain and completion queues. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_roce_if.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 0f7d5275e515..b559b1c9e76d 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -263,6 +263,19 @@ struct qed_rdma_register_tid_in_params { u64 dif_runt_addr; }; +struct qed_rdma_create_cq_in_params { + u32 cq_handle_lo; + u32 cq_handle_hi; + u32 cq_size; + u16 dpi; + bool pbl_two_level; + u64 pbl_ptr; + u16 pbl_num_pages; + u8 pbl_page_size_log; + u8 cnq_id; + u16 int_timeout; +}; + struct qed_rdma_create_srq_in_params { u64 pbl_base_addr; u64 prod_pair_addr; @@ -271,6 +284,14 @@ struct qed_rdma_create_srq_in_params { u16 page_size; }; +struct qed_rdma_destroy_cq_in_params { + u16 icid; +}; + +struct qed_rdma_destroy_cq_out_params { + u16 num_cq_notif; +}; + struct qed_rdma_create_srq_out_params { u16 srq_id; }; @@ -332,12 +353,21 @@ struct qed_rdma_ops { void (*rdma_remove_user)(void *rdma_cxt, u16 dpi); int (*rdma_stop)(void *rdma_cxt); struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt); + struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt); int (*rdma_get_start_sb)(struct qed_dev *cdev); int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev); void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod); int (*rdma_get_rdma_int)(struct qed_dev *cdev, struct qed_int_info *info); int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); + int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd); + void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd); + int (*rdma_create_cq)(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, + u16 *icid); + int (*rdma_destroy_cq)(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *iparams, + struct qed_rdma_destroy_cq_out_params *oparams); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); -- cgit v1.2.3 From f109394033521862f2558df93d9afc4dfa829c6a Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 21:59:59 +0300 Subject: qed: Add support for QP verbs Add support for the slowpath configurations of Queue Pair verbs which adds, deletes, modifies and queries Queue Pairs. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_roce_if.h | 144 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index b559b1c9e76d..02321e3b1716 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -43,6 +43,17 @@ #define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) /* rdma interface */ + +enum qed_roce_qp_state { + QED_ROCE_QP_STATE_RESET, + QED_ROCE_QP_STATE_INIT, + QED_ROCE_QP_STATE_RTR, + QED_ROCE_QP_STATE_RTS, + QED_ROCE_QP_STATE_SQD, + QED_ROCE_QP_STATE_ERR, + QED_ROCE_QP_STATE_SQE +}; + enum qed_rdma_tid_type { QED_RDMA_TID_REGISTERED_MR, QED_RDMA_TID_FMR, @@ -292,6 +303,128 @@ struct qed_rdma_destroy_cq_out_params { u16 num_cq_notif; }; +struct qed_rdma_create_qp_in_params { + u32 qp_handle_lo; + u32 qp_handle_hi; + u32 qp_handle_async_lo; + u32 qp_handle_async_hi; + bool use_srq; + bool signal_all; + bool fmr_and_reserved_lkey; + u16 pd; + u16 dpi; + u16 sq_cq_id; + u16 sq_num_pages; + u64 sq_pbl_ptr; + u8 max_sq_sges; + u16 rq_cq_id; + u16 rq_num_pages; + u64 rq_pbl_ptr; + u16 srq_id; + u8 stats_queue; +}; + +struct qed_rdma_create_qp_out_params { + u32 qp_id; + u16 icid; + void *rq_pbl_virt; + dma_addr_t rq_pbl_phys; + void *sq_pbl_virt; + dma_addr_t sq_pbl_phys; +}; + +struct qed_rdma_modify_qp_in_params { + u32 modify_flags; +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_SHIFT 0 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_SHIFT 1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_SHIFT 2 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_SHIFT 3 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_SHIFT 4 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_SHIFT 5 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_SHIFT 6 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_SHIFT 7 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_SHIFT 8 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_SHIFT 9 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_SHIFT 10 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_SHIFT 11 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_SHIFT 12 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_SHIFT 13 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_SHIFT 14 + + enum qed_roce_qp_state new_state; + u16 pkey; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + u32 dest_qp; + bool lb_indication; + u16 mtu; + u8 traffic_class_tos; + u8 hop_limit_ttl; + u32 flow_label; + union qed_gid sgid; + union qed_gid dgid; + u16 udp_src_port; + + u16 vlan_id; + + u32 rq_psn; + u32 sq_psn; + u8 max_rd_atomic_resp; + u8 max_rd_atomic_req; + u32 ack_timeout; + u8 retry_cnt; + u8 rnr_retry_cnt; + u8 min_rnr_nak_timer; + bool sqd_async; + u8 remote_mac_addr[6]; + u8 local_mac_addr[6]; + bool use_local_mac; + enum roce_mode roce_mode; +}; + +struct qed_rdma_query_qp_out_params { + enum qed_roce_qp_state state; + u32 rq_psn; + u32 sq_psn; + bool draining; + u16 mtu; + u32 dest_qp; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + union qed_gid sgid; + union qed_gid dgid; + u32 flow_label; + u8 hop_limit_ttl; + u8 traffic_class_tos; + u32 timeout; + u8 rnr_retry; + u8 retry_cnt; + u8 min_rnr_nak_timer; + u16 pkey_index; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + bool sqd_async; +}; + struct qed_rdma_create_srq_out_params { u16 srq_id; }; @@ -368,6 +501,17 @@ struct qed_rdma_ops { int (*rdma_destroy_cq)(void *rdma_cxt, struct qed_rdma_destroy_cq_in_params *iparams, struct qed_rdma_destroy_cq_out_params *oparams); + struct qed_rdma_qp * + (*rdma_create_qp)(void *rdma_cxt, + struct qed_rdma_create_qp_in_params *iparams, + struct qed_rdma_create_qp_out_params *oparams); + + int (*rdma_modify_qp)(void *roce_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *iparams); + + int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *oparams); + int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); -- cgit v1.2.3 From ee8eaea30b1368680f4d2f873bc14e1d7b57d021 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 22:00:00 +0300 Subject: qed: Add support for memory registeration verbs Add slowpath configuration support for user, dma and memory regions registration. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_roce_if.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 02321e3b1716..0b6df6eedcf1 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -512,6 +512,12 @@ struct qed_rdma_ops { int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, struct qed_rdma_query_qp_out_params *oparams); int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); + int + (*rdma_register_tid)(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *iparams); + int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); + int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); + void (*rdma_free_tid)(void *rdma_cxt, u32 itid); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); -- cgit v1.2.3 From abd49676c70793ee0a251bc3d8fe1604f9303210 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sat, 1 Oct 2016 22:00:01 +0300 Subject: qed: Add RoCE ll2 & GSI support Add the RoCE-specific LL2 logic [as well as GSI support] over the 'generic' LL2 interface. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_roce_if.h | 79 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 0b6df6eedcf1..53047d3fa678 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -39,6 +39,16 @@ #include #include #include +#include + +enum qed_roce_ll2_tx_dest { + /* Light L2 TX Destination to the Network */ + QED_ROCE_LL2_TX_DEST_NW, + + /* Light L2 TX Destination to the Loopback */ + QED_ROCE_LL2_TX_DEST_LB, + QED_ROCE_LL2_TX_DEST_MAX +}; #define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) @@ -461,6 +471,61 @@ struct qed_rdma_counters_out_params { #define QED_ROCE_TX_HEAD_FAILURE (1) #define QED_ROCE_TX_FRAG_FAILURE (2) +struct qed_roce_ll2_header { + void *vaddr; + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_buffer { + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_packet { + struct qed_roce_ll2_header header; + int n_seg; + struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; + int roce_mode; + enum qed_roce_ll2_tx_dest tx_dest; +}; + +struct qed_roce_ll2_tx_params { + int reserved; +}; + +struct qed_roce_ll2_rx_params { + u16 vlan_id; + u8 smac[ETH_ALEN]; + int rc; +}; + +struct qed_roce_ll2_cbs { + void (*tx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt); + + void (*rx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt, + struct qed_roce_ll2_rx_params *params); +}; + +struct qed_roce_ll2_params { + u16 max_rx_buffers; + u16 max_tx_buffers; + u16 mtu; + u8 mac_address[ETH_ALEN]; + struct qed_roce_ll2_cbs cbs; + void *cb_cookie; +}; + +struct qed_roce_ll2_info { + u8 handle; + struct qed_roce_ll2_cbs cbs; + u8 mac_address[ETH_ALEN]; + void *cb_cookie; + + /* Lock to protect ll2 */ + struct mutex lock; +}; + enum qed_rdma_type { QED_RDMA_TYPE_ROCE, }; @@ -518,6 +583,20 @@ struct qed_rdma_ops { int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); void (*rdma_free_tid)(void *rdma_cxt, u32 itid); + int (*roce_ll2_start)(struct qed_dev *cdev, + struct qed_roce_ll2_params *params); + int (*roce_ll2_stop)(struct qed_dev *cdev); + int (*roce_ll2_tx)(struct qed_dev *cdev, + struct qed_roce_ll2_packet *packet, + struct qed_roce_ll2_tx_params *params); + int (*roce_ll2_post_rx_buffer)(struct qed_dev *cdev, + struct qed_roce_ll2_buffer *buf, + u64 cookie, u8 notify_fw); + int (*roce_ll2_set_mac_filter)(struct qed_dev *cdev, + u8 *old_mac_address, + u8 *new_mac_address); + int (*roce_ll2_stats)(struct qed_dev *cdev, + struct qed_ll2_stats *stats); }; const struct qed_rdma_ops *qed_get_rdma_ops(void); -- cgit v1.2.3 From 21f54ddae449f4bdd9f1498124901d67202243d9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Oct 2016 21:03:48 -0700 Subject: Using BUG_ON() as an assert() is _never_ acceptable That just generally kills the machine, and makes debugging only much harder, since the traces may long be gone. Debugging by assert() is a disease. Don't do it. If you can continue, you're much better off doing so with a live machine where you have a much higher chance that the report actually makes it to the system logs, rather than result in a machine that is just completely dead. The only valid situation for BUG_ON() is when continuing is not an option, because there is massive corruption. But if you are just verifying that something is true, you warn about your broken assumptions (preferably just once), and limp on. Fixes: 22f2ac51b6d6 ("mm: workingset: fix crash in shadow node shrinker caused by replace_page_cache_page()") Cc: Johannes Weiner Cc: Miklos Szeredi Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 4a529c984a3f..e1d761463243 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -257,7 +257,7 @@ static inline void workingset_node_pages_inc(struct radix_tree_node *node) static inline void workingset_node_pages_dec(struct radix_tree_node *node) { - VM_BUG_ON(!workingset_node_pages(node)); + VM_WARN_ON_ONCE(!workingset_node_pages(node)); node->count--; } @@ -273,7 +273,7 @@ static inline void workingset_node_shadows_inc(struct radix_tree_node *node) static inline void workingset_node_shadows_dec(struct radix_tree_node *node) { - VM_BUG_ON(!workingset_node_shadows(node)); + VM_WARN_ON_ONCE(!workingset_node_shadows(node)); node->count -= 1U << RADIX_TREE_COUNT_SHIFT; } -- cgit v1.2.3 From cdd8da8cc66b3d205120560649e530978ccbc567 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Fri, 2 Sep 2016 16:52:46 +0100 Subject: mfd: arizona: Add gating of external MCLKn clocks This patch adds requesting of the clocks supplied on MCLK1, MCLK2 pins, gating of the 32k clock is added to the arizona_clk32k_enable(), arizona_clk32k_disable() helpers. It's a temporary change until the CODEC's clock controller gets exposed through the clk API and is helpful for board configurations where the MCLK clocks are not provided by always on oscillators. Signed-off-by: Sylwester Nawrocki Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/arizona/core.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 58ab4c0fe761..b9909bb0642b 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -13,6 +13,7 @@ #ifndef _WM_ARIZONA_CORE_H #define _WM_ARIZONA_CORE_H +#include #include #include #include @@ -21,6 +22,12 @@ #define ARIZONA_MAX_CORE_SUPPLIES 2 +enum { + ARIZONA_MCLK1, + ARIZONA_MCLK2, + ARIZONA_NUM_MCLK +}; + enum arizona_type { WM5102 = 1, WM5110 = 2, @@ -139,6 +146,8 @@ struct arizona { struct mutex clk_lock; int clk32k_ref; + struct clk *mclk[ARIZONA_NUM_MCLK]; + bool ctrlif_error; struct snd_soc_dapm_context *dapm; -- cgit v1.2.3 From 37778d83a744a1ebed1478b490601c40f8827607 Mon Sep 17 00:00:00 2001 From: Steve Twiss Date: Mon, 8 Aug 2016 14:16:11 +0100 Subject: mfd: da9063: Update author information to remove incorrect e-mail addresses Remove incorrect e-mail addresses from the copyright header and MODULE_AUTHOR() macro. These e-mail addresses are no longer in use. The author names have not been changed, only the e-mail addresses have been deleted from the source files. Signed-off-by: Steve Twiss Signed-off-by: Lee Jones --- include/linux/mfd/da9063/core.h | 4 ++-- include/linux/mfd/da9063/pdata.h | 4 ++-- include/linux/mfd/da9063/registers.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index 621af82123c6..f3ae65db4c86 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -3,8 +3,8 @@ * * Copyright 2012 Dialog Semiconductor Ltd. * - * Author: Michal Hajduk - * Krystian Garbaciak + * Author: Michal Hajduk, Dialog Semiconductor + * Author: Krystian Garbaciak, Dialog Semiconductor * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h index 612383bd80ae..8a125701ef7b 100644 --- a/include/linux/mfd/da9063/pdata.h +++ b/include/linux/mfd/da9063/pdata.h @@ -3,8 +3,8 @@ * * Copyright 2012 Dialog Semiconductor Ltd. * - * Author: Michal Hajduk - * Author: Krystian Garbaciak + * Author: Michal Hajduk, Dialog Semiconductor + * Author: Krystian Garbaciak, Dialog Semiconductor * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h index 2e0ba6d5fbc3..5d42859cb441 100644 --- a/include/linux/mfd/da9063/registers.h +++ b/include/linux/mfd/da9063/registers.h @@ -3,8 +3,8 @@ * * Copyright 2012 Dialog Semiconductor Ltd. * - * Author: Michal Hajduk - * Krystian Garbaciak + * Author: Michal Hajduk, Dialog Semiconductor + * Author: Krystian Garbaciak, Dialog Semiconductor * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the -- cgit v1.2.3 From 9a6dc644512fd083400a96ac4a035ac154fe6b8d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2016 08:26:56 +0300 Subject: mfd: 88pm80x: Double shifting bug in suspend/resume set_bit() and clear_bit() take the bit number so this code is really doing "1 << (1 << irq)" which is a double shift bug. It's done consistently so it won't cause a problem unless "irq" is more than 4. Fixes: 70c6cce04066 ('mfd: Support 88pm80x in 80x driver') Signed-off-by: Dan Carpenter Signed-off-by: Lee Jones --- include/linux/mfd/88pm80x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h index d409ceb2231e..c118a7ec94d6 100644 --- a/include/linux/mfd/88pm80x.h +++ b/include/linux/mfd/88pm80x.h @@ -350,7 +350,7 @@ static inline int pm80x_dev_suspend(struct device *dev) int irq = platform_get_irq(pdev, 0); if (device_may_wakeup(dev)) - set_bit((1 << irq), &chip->wu_flag); + set_bit(irq, &chip->wu_flag); return 0; } @@ -362,7 +362,7 @@ static inline int pm80x_dev_resume(struct device *dev) int irq = platform_get_irq(pdev, 0); if (device_may_wakeup(dev)) - clear_bit((1 << irq), &chip->wu_flag); + clear_bit(irq, &chip->wu_flag); return 0; } -- cgit v1.2.3 From 8c5d0571596efa5656cc53144172baa7c5c57b43 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Aug 2016 14:07:42 +0200 Subject: mfd: max14577: Change Krzysztof Kozlowski's email to kernel.org Change my email address to kernel.org instead of Samsung one for the purpose of any future contact. The copyrights remain untouched and are attributed to Samsung. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/max14577-private.h | 2 +- include/linux/mfd/max14577.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h index f01c1fae4d84..df75234f979d 100644 --- a/include/linux/mfd/max14577-private.h +++ b/include/linux/mfd/max14577-private.h @@ -3,7 +3,7 @@ * * Copyright (C) 2014 Samsung Electrnoics * Chanwoo Choi - * Krzysztof Kozlowski + * Krzysztof Kozlowski * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h index ccfaf952c31b..d81b52bb8bee 100644 --- a/include/linux/mfd/max14577.h +++ b/include/linux/mfd/max14577.h @@ -3,7 +3,7 @@ * * Copyright (C) 2014 Samsung Electrnoics * Chanwoo Choi - * Krzysztof Kozlowski + * Krzysztof Kozlowski * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by -- cgit v1.2.3 From c695abab2429cfa9554aa353702936d1f064f073 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 10 Aug 2016 16:48:20 +0200 Subject: mfd: Add Samsung Exynos Low Power Audio Subsystem driver This patch adds common driver for the Top block of the Samsung Exynos SoC Low Power Audio Subsystem. This is a minimal driver which prepares resources for IP blocks like I2S, audio DMA and UART and exposes a regmap for the Top block registers. Also system power ops are added to ensure the Audio Subsystem is operational after system suspend/resume cycle. Signed-off-by: Inha Song Signed-off-by: Beomho Seo Signed-off-by: Sylwester Nawrocki Tested-by: Chanwoo Choi Signed-off-by: Lee Jones --- include/linux/mfd/syscon/exynos5-pmu.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/exynos5-pmu.h b/include/linux/mfd/syscon/exynos5-pmu.h index 76f30f940c70..c28ff21ca4d2 100644 --- a/include/linux/mfd/syscon/exynos5-pmu.h +++ b/include/linux/mfd/syscon/exynos5-pmu.h @@ -43,8 +43,10 @@ #define EXYNOS5433_MIPI_PHY2_CONTROL (0x718) #define EXYNOS5_PHY_ENABLE BIT(0) - #define EXYNOS5_MIPI_PHY_S_RESETN BIT(1) #define EXYNOS5_MIPI_PHY_M_RESETN BIT(2) +#define EXYNOS5433_PAD_RETENTION_AUD_OPTION (0x3028) +#define EXYNOS5433_PAD_INITIATE_WAKEUP_FROM_LOWPWR BIT(28) + #endif /* _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_ */ -- cgit v1.2.3 From 6556bdacf646fcaa0586123ba85412de1c8f0eee Mon Sep 17 00:00:00 2001 From: Marcin Niestroj Date: Fri, 9 Sep 2016 10:42:02 +0200 Subject: mfd: tps65217: Add support for IRQs Add support for handling IRQs: power button, AC and USB power state changes. Mask and interrupt bits are shared within one register, which prevents us to use regmap_irq implementation. New irq_domain is created in order to add interrupt handling for each tps65217's subsystem. IRQ resources have been added for charger subsystem to be able to notify about AC and USB state changes. Signed-off-by: Marcin Niestroj Reviewed-by: Grygorii Strashko Tested-by: Tony Lindgren Signed-off-by: Lee Jones --- include/linux/mfd/tps65217.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h index 1c88231496d3..4ccda8969639 100644 --- a/include/linux/mfd/tps65217.h +++ b/include/linux/mfd/tps65217.h @@ -73,6 +73,7 @@ #define TPS65217_PPATH_AC_CURRENT_MASK 0x0C #define TPS65217_PPATH_USB_CURRENT_MASK 0x03 +#define TPS65217_INT_RESERVEDM BIT(7) #define TPS65217_INT_PBM BIT(6) #define TPS65217_INT_ACM BIT(5) #define TPS65217_INT_USBM BIT(4) @@ -233,6 +234,13 @@ struct tps65217_bl_pdata { int dft_brightness; }; +enum tps65217_irq_type { + TPS65217_IRQ_PB, + TPS65217_IRQ_AC, + TPS65217_IRQ_USB, + TPS65217_NUM_IRQ +}; + /** * struct tps65217_board - packages regulator init data * @tps65217_regulator_data: regulator initialization values @@ -258,6 +266,10 @@ struct tps65217 { struct regulator_desc desc[TPS65217_NUM_REGULATOR]; struct regmap *regmap; u8 *strobes; + struct irq_domain *irq_domain; + struct mutex irq_lock; + u8 irq_mask; + int irq; }; static inline struct tps65217 *dev_to_tps65217(struct device *dev) -- cgit v1.2.3 From fe62c477df0c9501dea8c16d78e722c902737a53 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 6 Sep 2016 08:59:55 +0800 Subject: mfd: lp873x: Remove unused mutex lock from struct lp873x The mutex is not used, so remove it. Signed-off-by: Axel Lin Signed-off-by: Lee Jones --- include/linux/mfd/lp873x.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/lp873x.h b/include/linux/mfd/lp873x.h index 83b1bd7588be..edbec8350a49 100644 --- a/include/linux/mfd/lp873x.h +++ b/include/linux/mfd/lp873x.h @@ -263,7 +263,6 @@ enum lp873x_regulator_id { struct lp873x { struct device *dev; u8 rev; - struct mutex lock; /* lock guarding the data structure */ struct regmap *regmap; }; #endif /* __LINUX_MFD_LP873X_H */ -- cgit v1.2.3 From fae5e033d65a03c8b705be8ffc0ef69162544368 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Sep 2016 15:13:01 +0200 Subject: mfd: rk808: Fix RK818_IRQ_DISCHG_ILIM initializer When building with -Woverride-init, we get a warning about an incorrect initializer: drivers/mfd/rk808.c:244:8: error: initialized field overwritten [-Werror=override-init] [RK818_IRQ_DISCHG_ILIM] = { This is clearly a mistake, as both RK818_IRQ_DISCHG_ILIM and RK818_IRQ_USB_OV are defined as '7', but they refer to different register bits. Changing RK818_IRQ_DISCHG_ILIM to 15 is consistent with how all other 14 interrupts are handled here, so I'm assuming this is what it should have been. Fixes: 2eedcbfc0612 ("mfd: rk808: Add RK818 support") Signed-off-by: Arnd Bergmann Acked-by: Andy Yan Signed-off-by: Lee Jones --- include/linux/mfd/rk808.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index fc5db6fcb57d..6d435a3c06bc 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -244,7 +244,7 @@ enum rk818_reg { #define RK818_IRQ_CHG_TS1 12 #define RK818_IRQ_TS2 13 #define RK818_IRQ_CHG_CVTLIM 14 -#define RK818_IRQ_DISCHG_ILIM 7 +#define RK818_IRQ_DISCHG_ILIM 15 #define RK818_IRQ_VOUT_LO_MSK BIT(0) #define RK818_IRQ_VB_LO_MSK BIT(1) -- cgit v1.2.3 From c45eab2cb06092aa61e67fc6801cf0d6fac29482 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 14 Sep 2016 11:14:30 +0100 Subject: mfd: ab8500-debugfs: Prevent initialised field from being over-written Due to the lack of parity in the way array fields have been named/ numbered, a mistake was made where more debug fields were declared than actually existed. In doing so, 2 fields were added, which although unclear, were already declared in the array. The result was that the latter declarations trashed the former ones. This patch places the array back in the correct order and removes the offending NULL entries. While we're at it, let's ensure this doesn't happen again by naming each field properly and add a new *_LAST define to describe how many fields there should be. Signed-off-by: Lee Jones --- include/linux/mfd/abx500/ab8500.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 9475fee2bfc5..d33c245e75ca 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -63,6 +63,8 @@ enum ab8500_version { #define AB8500_STE_TEST 0x14 #define AB8500_OTP_EMUL 0x15 +#define AB8500_DEBUG_FIELD_LAST 0x16 + /* * Interrupts * Values used to index into array ab8500_irq_regoffset[] defined in -- cgit v1.2.3 From 45ff2b685a6e8588ed637ba711b9a42ad1963066 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 14 Sep 2016 11:51:42 +0100 Subject: mfd: db8500-prcmu: Remove unused *prcmu_set_ddr_opp() calls There are no call sites for these functions. Strip them out. Signed-off-by: Lee Jones --- include/linux/mfd/db8500-prcmu.h | 6 ------ include/linux/mfd/dbx500-prcmu.h | 9 --------- 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h index 0bd69446bb05..7ba67b55b312 100644 --- a/include/linux/mfd/db8500-prcmu.h +++ b/include/linux/mfd/db8500-prcmu.h @@ -538,7 +538,6 @@ int db8500_prcmu_get_arm_opp(void); int db8500_prcmu_set_ape_opp(u8 opp); int db8500_prcmu_get_ape_opp(void); int db8500_prcmu_request_ape_opp_100_voltage(bool enable); -int db8500_prcmu_set_ddr_opp(u8 opp); int db8500_prcmu_get_ddr_opp(void); u32 db8500_prcmu_read(unsigned int reg); @@ -594,11 +593,6 @@ static inline int prcmu_release_usb_wakeup_state(void) return 0; } -static inline int db8500_prcmu_set_ddr_opp(u8 opp) -{ - return 0; -} - static inline int db8500_prcmu_get_ddr_opp(void) { return DDR_100_OPP; diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index 5d374601404c..2e2c6a63a065 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -269,10 +269,6 @@ unsigned long prcmu_clock_rate(u8 clock); long prcmu_round_clock_rate(u8 clock, unsigned long rate); int prcmu_set_clock_rate(u8 clock, unsigned long rate); -static inline int prcmu_set_ddr_opp(u8 opp) -{ - return db8500_prcmu_set_ddr_opp(opp); -} static inline int prcmu_get_ddr_opp(void) { return db8500_prcmu_get_ddr_opp(); @@ -489,11 +485,6 @@ static inline int prcmu_get_arm_opp(void) return ARM_100_OPP; } -static inline int prcmu_set_ddr_opp(u8 opp) -{ - return 0; -} - static inline int prcmu_get_ddr_opp(void) { return DDR_100_OPP; -- cgit v1.2.3 From 0133d323463ec20131f7fd5a70051bd89db4f7a1 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 31 Aug 2016 14:46:21 +0300 Subject: mfd: twl6040: Register child device for twl6040-pdmclk The McPDM in OMAP4/5 is using the pdmclk from twl6040 as functional clock. The twl6040-pdmclk driver provides a clock which can be used to make sure that the pdmclk is active when the McPDM is in use. Signed-off-by: Peter Ujfalusi Acked-by: Rob Herring Signed-off-by: Lee Jones --- include/linux/mfd/twl6040.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h index 36795a1be479..a2e88761c09f 100644 --- a/include/linux/mfd/twl6040.h +++ b/include/linux/mfd/twl6040.h @@ -168,7 +168,7 @@ #define TWL6040_VIBROCDET 0x20 #define TWL6040_TSHUTDET 0x40 -#define TWL6040_CELLS 3 +#define TWL6040_CELLS 4 #define TWL6040_REV_ES1_0 0x00 #define TWL6040_REV_ES1_1 0x01 /* Rev ES1.1 and ES1.2 */ -- cgit v1.2.3 From 1961531d1a0a011c77bfaafc8412e84a919d1747 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 20 Sep 2016 16:30:13 +0100 Subject: mfd: arizona: Remove arizona_of_get_named_gpio helper function This function is only used in a single place and no new users will be added as all the devices other required GPIOs are already handled. As such just merge the code back into the calling function. Signed-off-by: Charles Keepax --- include/linux/mfd/arizona/core.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index b9909bb0642b..b31b3be7f8c9 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -191,7 +191,4 @@ int cs47l24_patch(struct arizona *arizona); int wm8997_patch(struct arizona *arizona); int wm8998_patch(struct arizona *arizona); -extern int arizona_of_get_named_gpio(struct arizona *arizona, const char *prop, - bool mandatory); - #endif -- cgit v1.2.3 From d3798ae8c6f3767c726403c2ca6ecc317752c9dd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 4 Oct 2016 22:02:08 +0200 Subject: mm: filemap: don't plant shadow entries without radix tree node When the underflow checks were added to workingset_node_shadow_dec(), they triggered immediately: kernel BUG at ./include/linux/swap.h:276! invalid opcode: 0000 [#1] SMP Modules linked in: isofs usb_storage fuse xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun nf_conntrack_netbios_ns nf_conntrack_broadcast ip6t_REJECT nf_reject_ipv6 soundcore wmi acpi_als pinctrl_sunrisepoint kfifo_buf tpm_tis industrialio acpi_pad pinctrl_intel tpm_tis_core tpm nfsd auth_rpcgss nfs_acl lockd grace sunrpc dm_crypt CPU: 0 PID: 20929 Comm: blkid Not tainted 4.8.0-rc8-00087-gbe67d60ba944 #1 Hardware name: System manufacturer System Product Name/Z170-K, BIOS 1803 05/06/2016 task: ffff8faa93ecd940 task.stack: ffff8faa7f478000 RIP: page_cache_tree_insert+0xf1/0x100 Call Trace: __add_to_page_cache_locked+0x12e/0x270 add_to_page_cache_lru+0x4e/0xe0 mpage_readpages+0x112/0x1d0 blkdev_readpages+0x1d/0x20 __do_page_cache_readahead+0x1ad/0x290 force_page_cache_readahead+0xaa/0x100 page_cache_sync_readahead+0x3f/0x50 generic_file_read_iter+0x5af/0x740 blkdev_read_iter+0x35/0x40 __vfs_read+0xe1/0x130 vfs_read+0x96/0x130 SyS_read+0x55/0xc0 entry_SYSCALL_64_fastpath+0x13/0x8f Code: 03 00 48 8b 5d d8 65 48 33 1c 25 28 00 00 00 44 89 e8 75 19 48 83 c4 18 5b 41 5c 41 5d 41 5e 5d c3 0f 0b 41 bd ef ff ff ff eb d7 <0f> 0b e8 88 68 ef ff 0f 1f 84 00 RIP page_cache_tree_insert+0xf1/0x100 This is a long-standing bug in the way shadow entries are accounted in the radix tree nodes. The shrinker needs to know when radix tree nodes contain only shadow entries, no pages, so node->count is split in half to count shadows in the upper bits and pages in the lower bits. Unfortunately, the radix tree implementation doesn't know of this and assumes all entries are in node->count. When there is a shadow entry directly in root->rnode and the tree is later extended, the radix tree implementation will copy that entry into the new node and and bump its node->count, i.e. increases the page count bits. Once the shadow gets removed and we subtract from the upper counter, node->count underflows and triggers the warning. Afterwards, without node->count reaching 0 again, the radix tree node is leaked. Limit shadow entries to when we have actual radix tree nodes and can count them properly. That means we lose the ability to detect refaults from files that had only the first page faulted in at eviction time. Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check") Signed-off-by: Johannes Weiner Reported-and-tested-by: Linus Torvalds Reviewed-by: Jan Kara Cc: Andrew Morton Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 4c45105dece3..52b97db93830 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -280,9 +280,9 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, struct radix_tree_node *node); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); -struct radix_tree_node *radix_tree_replace_clear_tags( - struct radix_tree_root *root, - unsigned long index, void *entry); +void radix_tree_clear_tags(struct radix_tree_root *root, + struct radix_tree_node *node, + void **slot); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items); -- cgit v1.2.3 From 241699cd72a8489c9446ae3910ddd243e9b9061b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 22 Sep 2016 16:33:12 -0400 Subject: new iov_iter flavour: pipe-backed iov_iter variant for passing data into pipe. copy_to_iter() copies data into page(s) it has allocated and stuffs them into the pipe; copy_page_to_iter() stuffs there a reference to the page given to it. Both will try to coalesce if possible. iov_iter_zero() is similar to copy_to_iter(); iov_iter_get_pages() and friends will do as copy_to_iter() would have and return the pages where the data would've been copied. iov_iter_advance() will truncate everything past the spot it has advanced to. New primitive: iov_iter_pipe(), used for initializing those. pipe should be locked all along. Running out of space acts as fault would for iovec-backed ones; in other words, giving it to ->read_iter() may result in short read if the pipe overflows, or -EFAULT if it happens with nothing copied there. In other words, ->read_iter() on those acts pretty much like ->splice_read(). Moreover, all generic_file_splice_read() users, as well as many other ->splice_read() instances can be switched to that scheme - that'll happen in the next commit. Signed-off-by: Al Viro --- include/linux/splice.h | 1 + include/linux/uio.h | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/splice.h b/include/linux/splice.h index 58b300f37534..00a21166e268 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -85,4 +85,5 @@ extern void splice_shrink_spd(struct splice_pipe_desc *); extern void spd_release_page(struct splice_pipe_desc *, unsigned int); extern const struct pipe_buf_operations page_cache_pipe_buf_ops; +extern const struct pipe_buf_operations default_pipe_buf_ops; #endif diff --git a/include/linux/uio.h b/include/linux/uio.h index 75b4aaf31a9d..b5ebe6dca404 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -13,6 +13,7 @@ #include struct page; +struct pipe_inode_info; struct kvec { void *iov_base; /* and that should *never* hold a userland pointer */ @@ -23,6 +24,7 @@ enum { ITER_IOVEC = 0, ITER_KVEC = 2, ITER_BVEC = 4, + ITER_PIPE = 8, }; struct iov_iter { @@ -33,8 +35,12 @@ struct iov_iter { const struct iovec *iov; const struct kvec *kvec; const struct bio_vec *bvec; + struct pipe_inode_info *pipe; + }; + union { + unsigned long nr_segs; + int idx; }; - unsigned long nr_segs; }; /* @@ -64,7 +70,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) } #define iov_for_each(iov, iter, start) \ - if (!((start).type & ITER_BVEC)) \ + if (!((start).type & (ITER_BVEC | ITER_PIPE))) \ for (iter = (start); \ (iter).count && \ ((iov = iov_iter_iovec(&(iter))), 1); \ @@ -94,6 +100,8 @@ void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec, unsigned long nr_segs, size_t count); void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); +void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe, + size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, @@ -109,7 +117,7 @@ static inline size_t iov_iter_count(struct iov_iter *i) static inline bool iter_is_iovec(struct iov_iter *i) { - return !(i->type & (ITER_BVEC | ITER_KVEC)); + return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE)); } /* -- cgit v1.2.3 From 82c156f853840645604acd7c2cebcb75ed1b6652 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 22 Sep 2016 23:35:42 -0400 Subject: switch generic_file_splice_read() to use of ->read_iter() ... and kill the ->splice_read() instances that can be switched to it Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..b04883e74579 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2794,8 +2794,6 @@ extern void block_sync_page(struct page *page); /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); -extern ssize_t default_file_splice_read(struct file *, loff_t *, - struct pipe_inode_info *, size_t, unsigned int); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, -- cgit v1.2.3 From 7bf2d1df80822ec056363627e2014990f068f7aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2016 10:45:12 +0200 Subject: pipe: add pipe_buf_get() helper Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 24f5470d3944..10876f3cb3da 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -115,6 +115,17 @@ struct pipe_buf_operations { void (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; +/** + * pipe_buf_get - get a reference to a pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to get a reference to + */ +static inline void pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + buf->ops->get(pipe, buf); +} + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE -- cgit v1.2.3 From a779638cf622f069a484e8802134cca3c6c71415 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2016 10:45:12 +0200 Subject: pipe: add pipe_buf_release() helper Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 10876f3cb3da..d24fa6da6ae3 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -126,6 +126,20 @@ static inline void pipe_buf_get(struct pipe_inode_info *pipe, buf->ops->get(pipe, buf); } +/** + * pipe_buf_release - put a reference to a pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to put a reference to + */ +static inline void pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + const struct pipe_buf_operations *ops = buf->ops; + + buf->ops = NULL; + ops->release(pipe, buf); +} + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE -- cgit v1.2.3 From fba597db4218ac324eee34b64736ea94829c95bf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2016 10:45:12 +0200 Subject: pipe: add pipe_buf_confirm() helper Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index d24fa6da6ae3..654413334537 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -140,6 +140,17 @@ static inline void pipe_buf_release(struct pipe_inode_info *pipe, ops->release(pipe, buf); } +/** + * pipe_buf_confirm - verify contents of the pipe buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to confirm + */ +static inline int pipe_buf_confirm(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return buf->ops->confirm(pipe, buf); +} + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE @@ -154,7 +165,6 @@ extern unsigned long pipe_user_pages_hard; extern unsigned long pipe_user_pages_soft; int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); - /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe); -- cgit v1.2.3 From ca76f5b6bdbdc50af0d7b98cfcf7a2be7e95eb3d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2016 10:45:12 +0200 Subject: pipe: add pipe_buf_steal() helper Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 654413334537..bddccf0159bb 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -151,6 +151,17 @@ static inline int pipe_buf_confirm(struct pipe_inode_info *pipe, return buf->ops->confirm(pipe, buf); } +/** + * pipe_buf_steal - attempt to take ownership of a pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to attempt to steal + */ +static inline int pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return buf->ops->steal(pipe, buf); +} + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE -- cgit v1.2.3 From a949e63992469fed87aef197347960ced31701b8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 27 Sep 2016 10:45:13 +0200 Subject: pipe: fix comment in pipe_buf_operations Map and unmap ops no longer exist. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index bddccf0159bb..e7497c9dde7f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -66,15 +66,10 @@ struct pipe_inode_info { * * ->confirm() * ->steal() - * ... - * ->map() - * ... - * ->unmap() * - * That is, ->map() must be called on a confirmed buffer, - * same goes for ->steal(). See below for the meaning of each - * operation. Also see kerneldoc in fs/pipe.c for the pipe - * and generic variants of these hooks. + * That is, ->steal() must be called on a confirmed buffer. + * See below for the meaning of each operation. Also see kerneldoc + * in fs/pipe.c for the pipe and generic variants of these hooks. */ struct pipe_buf_operations { /* -- cgit v1.2.3 From 09bb8bfffd29c3dffb72bc2c69a062dfb1ae624c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Aug 2016 10:19:06 +1000 Subject: exportfs: be careful to only return expected errors. When nfsd calls fh_to_dentry, it expect ESTALE or ENOMEM as errors. In particular it can be tempting to return ENOENT, but this is not handled well by nfsd. Rather than requiring strict adherence to error code code filesystems, treat all unexpected error codes the same as ESTALE. This is safest. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/exportfs.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index b03c0625fa6e..5ab958cdc50b 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -157,12 +157,13 @@ struct fid { * @fh_to_dentry is given a &struct super_block (@sb) and a file handle * fragment (@fh, @fh_len). It should return a &struct dentry which refers * to the same file that the file handle fragment refers to. If it cannot, - * it should return a %NULL pointer if the file was found but no acceptable - * &dentries were available, or an %ERR_PTR error code indicating why it - * couldn't be found (e.g. %ENOENT or %ENOMEM). Any suitable dentry can be - * returned including, if necessary, a new dentry created with d_alloc_root. - * The caller can then find any other extant dentries by following the - * d_alias links. + * it should return a %NULL pointer if the file cannot be found, or an + * %ERR_PTR error code of %ENOMEM if a memory allocation failure occurred. + * Any other error code is treated like %NULL, and will cause an %ESTALE error + * for callers of exportfs_decode_fh(). + * Any suitable dentry can be returned including, if necessary, a new dentry + * created with d_alloc_root. The caller can then find any other extant + * dentries by following the d_alias links. * * fh_to_parent: * Same as @fh_to_dentry, except that it returns a pointer to the parent -- cgit v1.2.3 From bba0bd31b117cba754322f337e61def53d9b22e5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 29 Sep 2016 17:48:35 +0200 Subject: sockfs: Get rid of getxattr iop If we allow pseudo-filesystems created with mount_pseudo to have xattr handlers, we can replace sockfs_getxattr with a sockfs_xattr_get handler to use the xattr handler name parsing. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- include/linux/fs.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..7540e872591a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2075,10 +2075,19 @@ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data); -extern struct dentry *mount_pseudo(struct file_system_type *, char *, - const struct super_operations *ops, - const struct dentry_operations *dops, - unsigned long); +extern struct dentry *mount_pseudo_xattr(struct file_system_type *, char *, + const struct super_operations *ops, + const struct xattr_handler **xattr, + const struct dentry_operations *dops, + unsigned long); + +static inline struct dentry * +mount_pseudo(struct file_system_type *fs_type, char *name, + const struct super_operations *ops, + const struct dentry_operations *dops, unsigned long magic) +{ + return mount_pseudo_xattr(fs_type, name, ops, NULL, dops, magic); +} /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ -- cgit v1.2.3 From 0e3b0d123c8fd5c42f364aea3ab663b1f18dad39 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 6 Oct 2016 23:13:15 -0700 Subject: libnvdimm, namespace: allow multiple pmem-namespaces per region at scan time If label scanning finds multiple valid pmem namespaces allow them to be surfaced rather than fail namespace scanning. Support for creating multiple namespaces per region is saved for a later patch. Note that this adds some new error messages to clarify which of the pmem namespaces in the set are potentially impacted by invalid labels. Signed-off-by: Dan Williams --- include/linux/nd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index f1ea426d6a5e..ddcc7788305c 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -77,11 +77,13 @@ struct nd_namespace_io { * @nsio: device and system physical address range to drive * @alt_name: namespace name supplied in the dimm label * @uuid: namespace name supplied in the dimm label + * @id: ida allocated id */ struct nd_namespace_pmem { struct nd_namespace_io nsio; char *alt_name; u8 *uuid; + int id; }; /** -- cgit v1.2.3 From 6ff3e912d32ece4e9cf8708da796e9e2e7979ffe Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 5 Oct 2016 14:04:15 -0700 Subject: libnvdimm, namespace: sort namespaces by dpa at init Add more determinism to initial namespace device-name assignments by sorting the namespaces by starting dpa. Signed-off-by: Dan Williams --- include/linux/nd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index ddcc7788305c..fa66aeed441a 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -107,19 +107,19 @@ struct nd_namespace_blk { struct resource **res; }; -static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev) +static inline struct nd_namespace_io *to_nd_namespace_io(const struct device *dev) { return container_of(dev, struct nd_namespace_io, common.dev); } -static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev) +static inline struct nd_namespace_pmem *to_nd_namespace_pmem(const struct device *dev) { struct nd_namespace_io *nsio = to_nd_namespace_io(dev); return container_of(nsio, struct nd_namespace_pmem, nsio); } -static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev) +static inline struct nd_namespace_blk *to_nd_namespace_blk(const struct device *dev) { return container_of(dev, struct nd_namespace_blk, common.dev); } -- cgit v1.2.3 From fd10ed8e6f4246ac5e18b921ba50562959502117 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 12 Sep 2016 19:16:21 +0300 Subject: IB/mlx4: Fix possible vl/sl field mismatch in LRH header in QP1 packets In MLX qp packets, the LRH (built by the driver) has both a VL field and an SL field. When building a QP1 packet, the VL field should reflect the SLtoVL mapping and not arbitrarily contain zero (as is done now). This bug causes credit problems in IB switches at high rates of QP1 packets. The fix is to cache the SL to VL mapping in the driver, and look up the VL mapped to the SL provided in the send request when sending QP1 packets. For FW versions which support generating a port_management_config_change event with subtype sl-to-vl-table-change, the driver uses that event to update its sl-to-vl mapping cache. Otherwise, the driver snoops incoming SMP mads to update the cache. There remains the case where the FW is running in secure-host mode (so no QP0 packets are delivered to the driver), and the FW does not generate the sl2vl mapping change event. To support this case, the driver updates (via querying the FW) its sl2vl mapping cache when running in secure-host mode when it receives either a Port Up event or a client-reregister event (where the port is still up, but there may have been an opensm failover). OpenSM modifies the sl2vl mapping before Port Up and Client-reregister events occur, so if there is a mapping change the driver's cache will be properly updated. Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 42da3552f7cb..062d10aaf5cb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -71,7 +71,8 @@ enum { MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, MLX4_FLAG_OLD_REG_MAC = 1 << 6, - MLX4_FLAG_BONDED = 1 << 7 + MLX4_FLAG_BONDED = 1 << 7, + MLX4_FLAG_SECURE_HOST = 1 << 8, }; enum { @@ -221,6 +222,7 @@ enum { MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, + MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 36, }; enum { @@ -448,6 +450,7 @@ enum { MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16, + MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP = 0x17, }; /* Port mgmt change event handling */ @@ -459,6 +462,11 @@ enum { MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; +union sl2vl_tbl_to_u64 { + u8 sl8[8]; + u64 sl64; +}; + enum { MLX4_DEVICE_STATE_UP = 1 << 0, MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, @@ -945,6 +953,9 @@ struct mlx4_eqe { __be32 block_ptr; __be32 tbl_entries_mask; } __packed tbl_change_info; + struct { + u8 sl2vl_table[8]; + } __packed sl2vl_tbl_change_info; } params; } __packed port_mgmt_change; struct { -- cgit v1.2.3 From d0a5b995a308347fdb1bb0412df32acd0312523b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 29 Sep 2016 17:48:39 +0200 Subject: vfs: Add IOP_XATTR inode operations flag The IOP_XATTR inode operations flag in inode->i_opflags indicates that the inode has xattr support. The flag is automatically set by new_inode() on filesystems with xattr support (where sb->s_xattr is defined), and cleared otherwise. Filesystems can explicitly clear it for inodes that should not have xattr support. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7540e872591a..91a7245e58c7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -591,6 +591,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_FASTPERM 0x0001 #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 +#define IOP_XATTR 0x0008 /* * Keep mostly read-only and often accessed (especially for -- cgit v1.2.3 From 5d6c31910bc0713e37628dc0ce677dcb13c8ccf4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 29 Sep 2016 17:48:42 +0200 Subject: xattr: Add __vfs_{get,set,remove}xattr helpers Right now, various places in the kernel check for the existence of getxattr, setxattr, and removexattr inode operations and directly call those operations. Switch to helper functions and test for the IOP_XATTR flag instead. Signed-off-by: Andreas Gruenbacher Acked-by: James Morris Signed-off-by: Al Viro --- include/linux/xattr.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 94079bab9243..6ae6b2e68efb 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -46,10 +46,13 @@ struct xattr { }; ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); +ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); +int __vfs_setxattr(struct dentry *, struct inode *, const char *, const void *, size_t, int); int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int); int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); +int __vfs_removexattr(struct dentry *, const char *); int vfs_removexattr(struct dentry *, const char *); ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); -- cgit v1.2.3 From c21dbe20f606219fe54faf555b7bc5565487c58f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 7 Oct 2016 16:56:52 -0700 Subject: fsnotify: convert notification_mutex to a spinlock notification_mutex is used to protect the list of pending events. As such there's no reason to use a sleeping lock for it. Convert it to a spinlock. [jack@suse.cz: fixed version] Link: http://lkml.kernel.org/r/1474031567-1831-1-git-send-email-jack@suse.cz Link: http://lkml.kernel.org/r/1473797711-14111-5-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Lino Sanfilippo Tested-by: Guenter Roeck Cc: Miklos Szeredi Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7268ed076be8..0713e873b1c9 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -135,7 +135,7 @@ struct fsnotify_group { const struct fsnotify_ops *ops; /* how this group handles things */ /* needed to send notification to userspace */ - struct mutex notification_mutex; /* protect the notification_list */ + spinlock_t notification_lock; /* protect the notification_list */ struct list_head notification_list; /* list of event_holder this group needs to send to userspace */ wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ unsigned int q_len; /* events on the queue */ -- cgit v1.2.3 From 073f65522aeb23e46fc8a809d69513132d3acc81 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 7 Oct 2016 16:56:55 -0700 Subject: fanotify: use notification_lock instead of access_lock Fanotify code has its own lock (access_lock) to protect a list of events waiting for a response from userspace. However this is somewhat awkward as the same list_head in the event is protected by notification_lock if it is part of the notification queue and by access_lock if it is part of the fanotify private queue which makes it difficult for any reliable checks in the generic code. So make fanotify use the same lock - notification_lock - for protecting its private event list. Link: http://lkml.kernel.org/r/1473797711-14111-6-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Lino Sanfilippo Cc: Miklos Szeredi Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fsnotify_backend.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0713e873b1c9..79467b239fcf 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -177,7 +177,6 @@ struct fsnotify_group { struct fanotify_group_private_data { #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* allows a group to block waiting for a userspace response */ - spinlock_t access_lock; struct list_head access_list; wait_queue_head_t access_waitq; #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ -- cgit v1.2.3 From 3740dcdf8a77ae6a66e99350e9fbd8a6ce4d493a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 7 Oct 2016 16:57:04 -0700 Subject: jiffies: add time comparison functions for 64 bit jiffies Though the time_before and time_after family of functions were nicely extended to support jiffies64, so that the interface would be consistent, it was forgotten to also extend the before/after jiffies functions to support jiffies64. This commit brings the interface to parity between jiffies and jiffies64, which is quite convenient. Link: http://lkml.kernel.org/r/20160929033319.12188-1-Jason@zx2c4.com Signed-off-by: Jason A. Donenfeld Cc: Thomas Gleixner Cc: John Stultz Signed-off-by: Linus Torvalds --- include/linux/jiffies.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 5fdc55312334..589d14e970ad 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -150,15 +150,19 @@ static inline u64 get_jiffies_64(void) /* time_is_before_jiffies(a) return true if a is before jiffies */ #define time_is_before_jiffies(a) time_after(jiffies, a) +#define time_is_before_jiffies64(a) time_after64(get_jiffies_64(), a) /* time_is_after_jiffies(a) return true if a is after jiffies */ #define time_is_after_jiffies(a) time_before(jiffies, a) +#define time_is_after_jiffies64(a) time_before64(get_jiffies_64(), a) /* time_is_before_eq_jiffies(a) return true if a is before or equal to jiffies*/ #define time_is_before_eq_jiffies(a) time_after_eq(jiffies, a) +#define time_is_before_eq_jiffies64(a) time_after_eq64(get_jiffies_64(), a) /* time_is_after_eq_jiffies(a) return true if a is after or equal to jiffies*/ #define time_is_after_eq_jiffies(a) time_before_eq(jiffies, a) +#define time_is_after_eq_jiffies64(a) time_before_eq64(get_jiffies_64(), a) /* * Have the 32 bit jiffies value wrap 5 minutes after boot -- cgit v1.2.3 From 7c5f64f84483bd13886348edda8b3e7b799a7fdb Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 7 Oct 2016 16:57:23 -0700 Subject: mm: oom: deduplicate victim selection code for memcg and global oom When selecting an oom victim, we use the same heuristic for both memory cgroup and global oom. The only difference is the scope of tasks to select the victim from. So we could just export an iterator over all memcg tasks and keep all oom related logic in oom_kill.c, but instead we duplicate pieces of it in memcontrol.c reusing some initially private functions of oom_kill.c in order to not duplicate all of it. That looks ugly and error prone, because any modification of select_bad_process should also be propagated to mem_cgroup_out_of_memory. Let's rework this as follows: keep all oom heuristic related code private to oom_kill.c and make oom_kill.c use exported memcg functions when it's really necessary (like in case of iterating over memcg tasks). Link: http://lkml.kernel.org/r/1470056933-7505-1-git-send-email-vdavydov@virtuozzo.com Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Tetsuo Handa Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 15 +++++++++++++++ include/linux/oom.h | 43 ++++--------------------------------------- 2 files changed, 19 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5d8ca6e02e39..0710143723bc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -366,6 +366,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, struct mem_cgroup *, struct mem_cgroup_reclaim_cookie *); void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); +int mem_cgroup_scan_tasks(struct mem_cgroup *, + int (*)(struct task_struct *, void *), void *); static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) { @@ -446,6 +448,8 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) void mem_cgroup_handle_over_high(void); +unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg); + void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p); @@ -639,6 +643,12 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root, { } +static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, + int (*fn)(struct task_struct *, void *), void *arg) +{ + return 0; +} + static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) { return 0; @@ -669,6 +679,11 @@ mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, return 0; } +static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) +{ + return 0; +} + static inline void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { diff --git a/include/linux/oom.h b/include/linux/oom.h index 5bc0457ee3a8..17946e5121b6 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -34,23 +34,11 @@ struct oom_control { * for display purposes. */ const int order; -}; -/* - * Types of limitations to the nodes from which allocations may occur - */ -enum oom_constraint { - CONSTRAINT_NONE, - CONSTRAINT_CPUSET, - CONSTRAINT_MEMORY_POLICY, - CONSTRAINT_MEMCG, -}; - -enum oom_scan_t { - OOM_SCAN_OK, /* scan thread and find its badness */ - OOM_SCAN_CONTINUE, /* do not consider thread for oom kill */ - OOM_SCAN_ABORT, /* abort the iteration and return */ - OOM_SCAN_SELECT, /* always select this thread first */ + /* Used by oom implementation, do not set */ + unsigned long totalpages; + struct task_struct *chosen; + unsigned long chosen_points; }; extern struct mutex oom_lock; @@ -70,30 +58,10 @@ static inline bool oom_task_origin(const struct task_struct *p) return p->signal->oom_flag_origin; } -extern void mark_oom_victim(struct task_struct *tsk); - -#ifdef CONFIG_MMU -extern void wake_oom_reaper(struct task_struct *tsk); -#else -static inline void wake_oom_reaper(struct task_struct *tsk) -{ -} -#endif - extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); -extern void oom_kill_process(struct oom_control *oc, struct task_struct *p, - unsigned int points, unsigned long totalpages, - const char *message); - -extern void check_panic_on_oom(struct oom_control *oc, - enum oom_constraint constraint); - -extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, - struct task_struct *task); - extern bool out_of_memory(struct oom_control *oc); extern void exit_oom_victim(struct task_struct *tsk); @@ -101,14 +69,11 @@ extern void exit_oom_victim(struct task_struct *tsk); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); -extern bool oom_killer_disabled; extern bool oom_killer_disable(void); extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); -bool task_will_free_mem(struct task_struct *task); - /* sysctls */ extern int sysctl_oom_dump_tasks; extern int sysctl_oom_kill_allocating_task; -- cgit v1.2.3 From 252e5c6e2e5b4557599ef86ea5d02b0395e9056c Mon Sep 17 00:00:00 2001 From: zijun_hu Date: Fri, 7 Oct 2016 16:57:26 -0700 Subject: mm/vmalloc.c: fix align value calculation error It causes double align requirement for __get_vm_area_node() if parameter size is power of 2 and VM_IOREMAP is set in parameter flags, for example size=0x10000 -> fls_long(0x10000)=17 -> align=0x20000 get_count_order_long() is implemented and can be used instead of fls_long() for fixing the bug, for example size=0x10000 -> get_count_order_long(0x10000)=16 -> align=0x10000 [akpm@linux-foundation.org: s/get_order_long()/get_count_order_long()/] [zijun_hu@zoho.com: fixes] Link: http://lkml.kernel.org/r/57AABC8B.1040409@zoho.com [akpm@linux-foundation.org: locate get_count_order_long() next to get_count_order()] [akpm@linux-foundation.org: move get_count_order[_long] definitions to pick up fls_long()] [zijun_hu@htc.com: move out get_count_order[_long]() from __KERNEL__ scope] Link: http://lkml.kernel.org/r/57B2C4CE.80303@zoho.com Link: http://lkml.kernel.org/r/fc045ecf-20fa-0722-b3ac-9a6140488fad@zoho.com Signed-off-by: zijun_hu Cc: Tejun Heo Cc: Johannes Weiner Cc: Minchan Kim Cc: David Rientjes Signed-off-by: zijun_hu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 299e76b59fe9..a83c822c35c2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -65,16 +65,6 @@ static inline int get_bitmask_order(unsigned int count) return order; /* We could be slightly more clever with -1 here... */ } -static inline int get_count_order(unsigned int count) -{ - int order; - - order = fls(count) - 1; - if (count & (count - 1)) - order++; - return order; -} - static __always_inline unsigned long hweight_long(unsigned long w) { return sizeof(w) == 4 ? hweight32(w) : hweight64(w); @@ -191,6 +181,32 @@ static inline unsigned fls_long(unsigned long l) return fls64(l); } +static inline int get_count_order(unsigned int count) +{ + int order; + + order = fls(count) - 1; + if (count & (count - 1)) + order++; + return order; +} + +/** + * get_count_order_long - get order after rounding @l up to power of 2 + * @l: parameter + * + * it is same as get_count_order() but with long type parameter + */ +static inline int get_count_order_long(unsigned long l) +{ + if (l == 0UL) + return -1; + else if (l & (l - 1UL)) + return (int)fls_long(l); + else + return (int)fls_long(l) - 1; +} + /** * __ffs64 - find first set bit in a 64 bit word * @word: The 64 bit word -- cgit v1.2.3 From 791cae9620e35d18df2cedf2bd444920c3ecf04a Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 7 Oct 2016 16:57:38 -0700 Subject: mm, compaction: cleanup unused functions Since kswapd compaction moved to kcompactd, compact_pgdat() is not called anymore, so we remove it. The only caller of __compact_pgdat() is compact_node(), so we merge them and remove code that was only reachable from kswapd. Link: http://lkml.kernel.org/r/20160810091226.6709-3-vbabka@suse.cz Signed-off-by: Vlastimil Babka Tested-by: Lorenzo Stoakes Acked-by: Michal Hocko Cc: Mel Gorman Cc: Joonsoo Kim Cc: David Rientjes Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index d4e106b5dc27..1bb58581301c 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -70,7 +70,6 @@ extern int fragmentation_index(struct zone *zone, unsigned int order); extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio); -extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern enum compact_result compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int classzone_idx); @@ -154,10 +153,6 @@ extern void kcompactd_stop(int nid); extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); #else -static inline void compact_pgdat(pg_data_t *pgdat, int order) -{ -} - static inline void reset_isolation_suitable(pg_data_t *pgdat) { } -- cgit v1.2.3 From cf378319d335663b6722e74db0211b8af55049d5 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 7 Oct 2016 16:57:41 -0700 Subject: mm, compaction: rename COMPACT_PARTIAL to COMPACT_SUCCESS COMPACT_PARTIAL has historically meant that compaction returned after doing some work without fully compacting a zone. It however didn't distinguish if compaction terminated because it succeeded in creating the requested high-order page. This has changed recently and now we only return COMPACT_PARTIAL when compaction thinks it succeeded, or the high-order watermark check in compaction_suitable() passes and no compaction needs to be done. So at this point we can make the return value clearer by renaming it to COMPACT_SUCCESS. The next patch will remove some redundant tests for success where compaction just returned COMPACT_SUCCESS. Link: http://lkml.kernel.org/r/20160810091226.6709-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Tested-by: Lorenzo Stoakes Acked-by: Michal Hocko Cc: Mel Gorman Cc: Joonsoo Kim Cc: David Rientjes Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 1bb58581301c..e88c037afe47 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -49,10 +49,10 @@ enum compact_result { COMPACT_CONTENDED, /* - * direct compaction partially compacted a zone and there might be - * suitable pages + * direct compaction terminated after concluding that the allocation + * should now succeed */ - COMPACT_PARTIAL, + COMPACT_SUCCESS, }; struct alloc_context; /* in mm/internal.h */ @@ -88,7 +88,7 @@ static inline bool compaction_made_progress(enum compact_result result) * that the compaction successfully isolated and migrated some * pageblocks. */ - if (result == COMPACT_PARTIAL) + if (result == COMPACT_SUCCESS) return true; return false; -- cgit v1.2.3 From a8e025e55b35f7eaf6c6c011de1f98d47ddf0843 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 7 Oct 2016 16:57:47 -0700 Subject: mm, compaction: add the ultimate direct compaction priority During reclaim/compaction loop, it's desirable to get a final answer from unsuccessful compaction so we can either fail the allocation or invoke the OOM killer. However, heuristics such as deferred compaction or pageblock skip bits can cause compaction to skip parts or whole zones and lead to premature OOM's, failures or excessive reclaim/compaction retries. To remedy this, we introduce a new direct compaction priority called COMPACT_PRIO_SYNC_FULL, which instructs direct compaction to: - ignore deferred compaction status for a zone - ignore pageblock skip hints - ignore cached scanner positions and scan the whole zone The new priority should get eventually picked up by should_compact_retry() and this should improve success rates for costly allocations using __GFP_REPEAT, such as hugetlbfs allocations, and reduce some corner-case OOM's for non-costly allocations. Link: http://lkml.kernel.org/r/20160810091226.6709-6-vbabka@suse.cz [vbabka@suse.cz: use the MIN_COMPACT_PRIORITY alias] Link: http://lkml.kernel.org/r/d443b884-87e7-1c93-8684-3a3a35759fb1@suse.cz Signed-off-by: Vlastimil Babka Tested-by: Lorenzo Stoakes Acked-by: Michal Hocko Cc: Mel Gorman Cc: Joonsoo Kim Cc: David Rientjes Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index e88c037afe47..a1fba9994728 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -6,8 +6,9 @@ * Lower value means higher priority, analogically to reclaim priority. */ enum compact_priority { + COMPACT_PRIO_SYNC_FULL, + MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_FULL, COMPACT_PRIO_SYNC_LIGHT, - MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT, DEF_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT, COMPACT_PRIO_ASYNC, INIT_COMPACT_PRIORITY = COMPACT_PRIO_ASYNC -- cgit v1.2.3 From 9861a62c335cd34a2b6b25aaaf5898e8370299ec Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 7 Oct 2016 16:57:53 -0700 Subject: mm, compaction: create compact_gap wrapper Compaction uses a watermark gap of (2UL << order) pages at various places and it's not immediately obvious why. Abstract it through a compact_gap() wrapper to create a single place with a thorough explanation. [vbabka@suse.cz: clarify the comment of compact_gap()] Link: http://lkml.kernel.org/r/7b6aed1f-fdf8-2063-9ff4-bbe4de712d37@suse.cz Link: http://lkml.kernel.org/r/20160810091226.6709-9-vbabka@suse.cz Signed-off-by: Vlastimil Babka Tested-by: Lorenzo Stoakes Acked-by: Michal Hocko Cc: Mel Gorman Cc: Joonsoo Kim Cc: David Rientjes Cc: Rik van Riel Signed-off-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index a1fba9994728..585d55cb0dc0 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -58,6 +58,29 @@ enum compact_result { struct alloc_context; /* in mm/internal.h */ +/* + * Number of free order-0 pages that should be available above given watermark + * to make sure compaction has reasonable chance of not running out of free + * pages that it needs to isolate as migration target during its work. + */ +static inline unsigned long compact_gap(unsigned int order) +{ + /* + * Although all the isolations for migration are temporary, compaction + * free scanner may have up to 1 << order pages on its list and then + * try to split an (order - 1) free page. At that point, a gap of + * 1 << order might not be enough, so it's safer to require twice that + * amount. Note that the number of pages on the list is also + * effectively limited by COMPACT_CLUSTER_MAX, as that's the maximum + * that the migrate scanner can have isolated on migrate list, and free + * scanner is only invoked when the number of isolated free pages is + * lower than that. But it's not worth to complicate the formula here + * as a bigger gap for higher orders than strictly necessary can also + * improve chances of compaction success. + */ + return 2UL << order; +} + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, -- cgit v1.2.3 From bf48438354a79df50fadd2e1c0b81baa2619a8b6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 7 Oct 2016 16:58:12 -0700 Subject: mm, vmscan: get rid of throttle_vm_writeout throttle_vm_writeout() was introduced back in 2005 to fix OOMs caused by excessive pageout activity during the reclaim. Too many pages could be put under writeback therefore LRUs would be full of unreclaimable pages until the IO completes and in turn the OOM killer could be invoked. There have been some important changes introduced since then in the reclaim path though. Writers are throttled by balance_dirty_pages when initiating the buffered IO and later during the memory pressure, the direct reclaim is throttled by wait_iff_congested if the node is considered congested by dirty pages on LRUs and the underlying bdi is congested by the queued IO. The kswapd is throttled as well if it encounters pages marked for immediate reclaim or under writeback which signals that that there are too many pages under writeback already. Finally should_reclaim_retry does congestion_wait if the reclaim cannot make any progress and there are too many dirty/writeback pages. Another important aspect is that we do not issue any IO from the direct reclaim context anymore. In a heavy parallel load this could queue a lot of IO which would be very scattered and thus unefficient which would just make the problem worse. This three mechanisms should throttle and keep the amount of IO in a steady state even under heavy IO and memory pressure so yet another throttling point doesn't really seem helpful. Quite contrary, Mikulas Patocka has reported that swap backed by dm-crypt doesn't work properly because the swapout IO cannot make sufficient progress as the writeout path depends on dm_crypt worker which has to allocate memory to perform the encryption. In order to guarantee a forward progress it relies on the mempool allocator. mempool_alloc(), however, prefers to use the underlying (usually page) allocator before it grabs objects from the pool. Such an allocation can dive into the memory reclaim and consequently to throttle_vm_writeout. If there are too many dirty or pages under writeback it will get throttled even though it is in fact a flusher to clear pending pages. kworker/u4:0 D ffff88003df7f438 10488 6 2 0x00000000 Workqueue: kcryptd kcryptd_crypt [dm_crypt] Call Trace: schedule+0x3c/0x90 schedule_timeout+0x1d8/0x360 io_schedule_timeout+0xa4/0x110 congestion_wait+0x86/0x1f0 throttle_vm_writeout+0x44/0xd0 shrink_zone_memcg+0x613/0x720 shrink_zone+0xe0/0x300 do_try_to_free_pages+0x1ad/0x450 try_to_free_pages+0xef/0x300 __alloc_pages_nodemask+0x879/0x1210 alloc_pages_current+0xa1/0x1f0 new_slab+0x2d7/0x6a0 ___slab_alloc+0x3fb/0x5c0 __slab_alloc+0x51/0x90 kmem_cache_alloc+0x27b/0x310 mempool_alloc_slab+0x1d/0x30 mempool_alloc+0x91/0x230 bio_alloc_bioset+0xbd/0x260 kcryptd_crypt+0x114/0x3b0 [dm_crypt] Let's just drop throttle_vm_writeout altogether. It is not very much helpful anymore. I have tried to test a potential writeback IO runaway similar to the one described in the original patch which has introduced that [1]. Small virtual machine (512MB RAM, 4 CPUs, 2G of swap space and disk image on a rather slow NFS in a sync mode on the host) with 8 parallel writers each writing 1G worth of data. As soon as the pagecache fills up and the direct reclaim hits then I start anon memory consumer in a loop (allocating 300M and exiting after populating it) in the background to make the memory pressure even stronger as well as to disrupt the steady state for the IO. The direct reclaim is throttled because of the congestion as well as kswapd hitting congestion_wait due to nr_immediate but throttle_vm_writeout doesn't ever trigger the sleep throughout the test. Dirty+writeback are close to nr_dirty_threshold with some fluctuations caused by the anon consumer. [1] https://www2.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.9-rc1/2.6.9-rc1-mm3/broken-out/vm-pageout-throttling.patch Link: http://lkml.kernel.org/r/1471171473-21418-1-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Mikulas Patocka Cc: Marcelo Tosatti Cc: NeilBrown Cc: Ondrej Kozina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fc1e16c25a29..797100e10010 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -319,7 +319,6 @@ void laptop_mode_timer_fn(unsigned long data); #else static inline void laptop_sync_completion(void) { } #endif -void throttle_vm_writeout(gfp_t gfp_mask); bool node_dirty_ok(struct pglist_data *pgdat); int wb_domain_init(struct wb_domain *dom, gfp_t gfp); #ifdef CONFIG_CGROUP_WRITEBACK -- cgit v1.2.3 From e2f612e673f61931b2fe62722832cf5fcf6b3313 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 7 Oct 2016 16:58:21 -0700 Subject: mm/page_owner: move page_owner specific function to page_owner.c There is no reason that page_owner specific function resides on vmstat.c. Link: http://lkml.kernel.org/r/1471315879-32294-4-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Reviewed-by: Sergey Senozhatsky Acked-by: Vlastimil Babka Cc: Minchan Kim Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_owner.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 30583ab0ffb1..2be728d156b5 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -14,6 +14,8 @@ extern void __split_page_owner(struct page *page, unsigned int order); extern void __copy_page_owner(struct page *oldpage, struct page *newpage); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(struct page *page); +extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, + pg_data_t *pgdat, struct zone *zone); static inline void reset_page_owner(struct page *page, unsigned int order) { -- cgit v1.2.3 From 980ac1672e7edaa927557a5186f1967cd45afcf5 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 7 Oct 2016 16:58:27 -0700 Subject: mm/page_ext: support extra space allocation by page_ext user Until now, if some page_ext users want to use it's own field on page_ext, it should be defined in struct page_ext by hard-coding. It has a problem that wastes memory in following situation. struct page_ext { #ifdef CONFIG_A int a; #endif #ifdef CONFIG_B int b; #endif }; Assume that kernel is built with both CONFIG_A and CONFIG_B. Even if we enable feature A and doesn't enable feature B at runtime, each entry of struct page_ext takes two int rather than one int. It's undesirable result so this patch tries to fix it. To solve above problem, this patch implements to support extra space allocation at runtime. When need() callback returns true, it's extra memory requirement is summed to entry size of page_ext. Also, offset for each user's extra memory space is returned. With this offset, user can use this extra space and there is no need to define needed field on page_ext by hard-coding. This patch only implements an infrastructure. Following patch will use it for page_owner which is only user having it's own fields on page_ext. Link: http://lkml.kernel.org/r/1471315879-32294-6-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: Minchan Kim Cc: Michal Hocko Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ext.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 03f2a3e7d76d..179bdc4a470c 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -7,6 +7,8 @@ struct pglist_data; struct page_ext_operations { + size_t offset; + size_t size; bool (*need)(void); void (*init)(void); }; -- cgit v1.2.3 From 9300d8dfd282bd1473395c5c4c76bfdc90b05978 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 7 Oct 2016 16:58:30 -0700 Subject: mm/page_owner: don't define fields on struct page_ext by hard-coding There is a memory waste problem if we define field on struct page_ext by hard-coding. Entry size of struct page_ext includes the size of those fields even if it is disabled at runtime. Now, extra memory request at runtime is possible so page_owner don't need to define it's own fields by hard-coding. This patch removes hard-coded define and uses extra memory for storing page_owner information in page_owner. Most of code are just mechanical changes. Link: http://lkml.kernel.org/r/1471315879-32294-7-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: Minchan Kim Cc: Michal Hocko Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ext.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 179bdc4a470c..9298c393ddaa 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -44,12 +44,6 @@ enum page_ext_flags { */ struct page_ext { unsigned long flags; -#ifdef CONFIG_PAGE_OWNER - unsigned int order; - gfp_t gfp_mask; - int last_migrate_reason; - depot_stack_handle_t handle; -#endif }; extern void pgdat_page_ext_init(struct pglist_data *pgdat); -- cgit v1.2.3 From f7e2355f0f8635ddcfd26858f58732b7bf85f9f4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 7 Oct 2016 16:58:36 -0700 Subject: mm: pagewalk: fix the comment for test_walk Modify the comment describing struct mm_walk->test_walk()s behaviour to match the comment on walk_page_test() and the behaviour of walk_page_vma(). Fixes: fafaa4264eba4 ("pagewalk: improve vma handling") Link: http://lkml.kernel.org/r/1471622518-21980-1-git-send-email-james.morse@arm.com Signed-off-by: James Morse Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5f14534f0c90..0a063b4e4456 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1197,10 +1197,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, * @pte_hole: if set, called for each hole at all levels * @hugetlb_entry: if set, called for each hugetlb entry * @test_walk: caller specific callback function to determine whether - * we walk over the current vma or not. A positive returned + * we walk over the current vma or not. Returning 0 * value means "do page table walk over the current vma," * and a negative one means "abort current page table walk - * right now." 0 means "skip the current vma." + * right now." 1 means "skip the current vma." * @mm: mm_struct representing the target process of page table walk * @vma: vma currently walked (NULL if walking outside vmas) * @private: private data for callbacks' usage -- cgit v1.2.3 From 6b53491598a4d9694318e6e2b11d8c9988a483d4 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 7 Oct 2016 16:58:42 -0700 Subject: mm, swap: add swap_cluster_list This is a code clean up patch without functionality changes. The swap_cluster_list data structure and its operations are introduced to provide some better encapsulation for the free cluster and discard cluster list operations. This avoid some code duplication, improved the code readability, and reduced the total line number. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1472067356-16004-1-git-send-email-ying.huang@intel.com Signed-off-by: "Huang, Ying" Acked-by: Minchan Kim Acked-by: Rik van Riel Cc: Tim Chen Cc: Hugh Dickins Cc: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index e1d761463243..a56523cefb9b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -191,6 +191,11 @@ struct percpu_cluster { unsigned int next; /* Likely next allocation offset */ }; +struct swap_cluster_list { + struct swap_cluster_info head; + struct swap_cluster_info tail; +}; + /* * The in-memory structure used to track swap areas. */ @@ -203,8 +208,7 @@ struct swap_info_struct { unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ - struct swap_cluster_info free_cluster_head; /* free cluster list head */ - struct swap_cluster_info free_cluster_tail; /* free cluster list tail */ + struct swap_cluster_list free_clusters; /* free clusters list */ unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ @@ -235,8 +239,7 @@ struct swap_info_struct { * first. */ struct work_struct discard_work; /* discard worker */ - struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */ - struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */ + struct swap_cluster_list discard_clusters; /* discard clusters list */ }; /* linux/mm/workingset.c */ -- cgit v1.2.3 From 8496afaba93ece80a83cbd096f0675a1020ddfc4 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 7 Oct 2016 16:58:48 -0700 Subject: mm,oom_reaper: do not attempt to reap a task twice "mm, oom_reaper: do not attempt to reap a task twice" tried to give the OOM reaper one more chance to retry using MMF_OOM_NOT_REAPABLE flag. But the usefulness of the flag is rather limited and actually never shown in practice. If the flag is set, it means that the holder of mm->mmap_sem cannot call up_write() due to presumably being blocked at unkillable wait waiting for other thread's memory allocation. But since one of threads sharing that mm will queue that mm immediately via task_will_free_mem() shortcut (otherwise, oom_badness() will select the same mm again due to oom_score_adj value unchanged), retrying MMF_OOM_NOT_REAPABLE mm is unlikely helpful. Let's always set MMF_OOM_REAPED. Link: http://lkml.kernel.org/r/1472119394-11342-3-git-send-email-mhocko@kernel.org Signed-off-by: Tetsuo Handa Signed-off-by: Michal Hocko Cc: Oleg Nesterov Cc: David Rientjes Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7543a476178b..b48cd32be445 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -525,7 +525,6 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_REAPED 21 /* mm has been already reaped */ -#define MMF_OOM_NOT_REAPABLE 22 /* mm couldn't be reaped */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) -- cgit v1.2.3 From 26db62f179d112d345031e14926a4cda9cd40d6e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 7 Oct 2016 16:58:51 -0700 Subject: oom: keep mm of the killed task available oom_reap_task has to call exit_oom_victim in order to make sure that the oom vicim will not block the oom killer for ever. This is, however, opening new problems (e.g oom_killer_disable exclusion - see commit 74070542099c ("oom, suspend: fix oom_reaper vs. oom_killer_disable race")). exit_oom_victim should be only called from the victim's context ideally. One way to achieve this would be to rely on per mm_struct flags. We already have MMF_OOM_REAPED to hide a task from the oom killer since "mm, oom: hide mm which is shared with kthread or global init". The problem is that the exit path: do_exit exit_mm tsk->mm = NULL; mmput __mmput exit_oom_victim doesn't guarantee that exit_oom_victim will get called in a bounded amount of time. At least exit_aio depends on IO which might get blocked due to lack of memory and who knows what else is lurking there. This patch takes a different approach. We remember tsk->mm into the signal_struct and bind it to the signal struct life time for all oom victims. __oom_reap_task_mm as well as oom_scan_process_thread do not have to rely on find_lock_task_mm anymore and they will have a reliable reference to the mm struct. As a result all the oom specific communication inside the OOM killer can be done via tsk->signal->oom_mm. Increasing the signal_struct for something as unlikely as the oom killer is far from ideal but this approach will make the code much more reasonable and long term we even might want to move task->mm into the signal_struct anyway. In the next step we might want to make the oom killer exclusion and access to memory reserves completely independent which would be also nice. Link: http://lkml.kernel.org/r/1472119394-11342-4-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Tetsuo Handa Cc: Oleg Nesterov Cc: David Rientjes Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b48cd32be445..67ea79610e67 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -805,6 +805,8 @@ struct signal_struct { short oom_score_adj; /* OOM kill score adjustment */ short oom_score_adj_min; /* OOM kill score adjustment min value. * Only settable by CAP_SYS_RESOURCE. */ + struct mm_struct *oom_mm; /* recorded mm when the thread group got + * killed by the oom killer */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations -- cgit v1.2.3 From 7283094ec3db318e87ec9e31cf75f136ac2a4dd3 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 7 Oct 2016 16:58:54 -0700 Subject: kernel, oom: fix potential pgd_lock deadlock from __mmdrop Lockdep complains that __mmdrop is not safe from the softirq context: ================================= [ INFO: inconsistent lock state ] 4.6.0-oomfortification2-00011-geeb3eadeab96-dirty #949 Tainted: G W --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/1/0 [HC0[0]:SC1[1]:HE1:SE0] takes: (pgd_lock){+.?...}, at: pgd_free+0x19/0x6b {SOFTIRQ-ON-W} state was registered at: __lock_acquire+0xa06/0x196e lock_acquire+0x139/0x1e1 _raw_spin_lock+0x32/0x41 __change_page_attr_set_clr+0x2a5/0xacd change_page_attr_set_clr+0x16f/0x32c set_memory_nx+0x37/0x3a free_init_pages+0x9e/0xc7 alternative_instructions+0xa2/0xb3 check_bugs+0xe/0x2d start_kernel+0x3ce/0x3ea x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x17a/0x18d irq event stamp: 105916 hardirqs last enabled at (105916): free_hot_cold_page+0x37e/0x390 hardirqs last disabled at (105915): free_hot_cold_page+0x2c1/0x390 softirqs last enabled at (105878): _local_bh_enable+0x42/0x44 softirqs last disabled at (105879): irq_exit+0x6f/0xd1 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pgd_lock); lock(pgd_lock); *** DEADLOCK *** 1 lock held by swapper/1/0: #0: (rcu_callback){......}, at: rcu_process_callbacks+0x390/0x800 stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 4.6.0-oomfortification2-00011-geeb3eadeab96-dirty #949 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 Call Trace: print_usage_bug.part.25+0x259/0x268 mark_lock+0x381/0x567 __lock_acquire+0x993/0x196e lock_acquire+0x139/0x1e1 _raw_spin_lock+0x32/0x41 pgd_free+0x19/0x6b __mmdrop+0x25/0xb9 __put_task_struct+0x103/0x11e delayed_put_task_struct+0x157/0x15e rcu_process_callbacks+0x660/0x800 __do_softirq+0x1ec/0x4d5 irq_exit+0x6f/0xd1 smp_apic_timer_interrupt+0x42/0x4d apic_timer_interrupt+0x8e/0xa0 arch_cpu_idle+0xf/0x11 default_idle_call+0x32/0x34 cpu_startup_entry+0x20c/0x399 start_secondary+0xfe/0x101 More over commit a79e53d85683 ("x86/mm: Fix pgd_lock deadlock") was explicit about pgd_lock not to be called from the irq context. This means that __mmdrop called from free_signal_struct has to be postponed to a user context. We already have a similar mechanism for mmput_async so we can use it here as well. This is safe because mm_count is pinned by mm_users. This fixes bug introduced by "oom: keep mm of the killed task available" Link: http://lkml.kernel.org/r/1472119394-11342-5-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Tetsuo Handa Cc: Oleg Nesterov Cc: David Rientjes Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 -- include/linux/sched.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 903200f4ec41..4a8acedf4b7d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -515,9 +515,7 @@ struct mm_struct { #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif -#ifdef CONFIG_MMU struct work_struct async_put_work; -#endif }; static inline void mm_init_cpumask(struct mm_struct *mm) diff --git a/include/linux/sched.h b/include/linux/sched.h index 67ea79610e67..c4b588358296 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2877,6 +2877,20 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } +static inline void mmdrop_async_fn(struct work_struct *work) +{ + struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); + __mmdrop(mm); +} + +static inline void mmdrop_async(struct mm_struct *mm) +{ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) { + INIT_WORK(&mm->async_put_work, mmdrop_async_fn); + schedule_work(&mm->async_put_work); + } +} + static inline bool mmget_not_zero(struct mm_struct *mm) { return atomic_inc_not_zero(&mm->mm_users); -- cgit v1.2.3 From 862e3073b3eed13f17bd6be6ca6052db15c0b728 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 7 Oct 2016 16:58:57 -0700 Subject: mm, oom: get rid of signal_struct::oom_victims After "oom: keep mm of the killed task available" we can safely detect an oom victim by checking task->signal->oom_mm so we do not need the signal_struct counter anymore so let's get rid of it. This alone wouldn't be sufficient for nommu archs because exit_oom_victim doesn't hide the process from the oom killer anymore. We can, however, mark the mm with a MMF flag in __mmput. We can reuse MMF_OOM_REAPED and rename it to a more generic MMF_OOM_SKIP. Link: http://lkml.kernel.org/r/1472119394-11342-6-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Tetsuo Handa Cc: Oleg Nesterov Cc: David Rientjes Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 5 +++++ include/linux/sched.h | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 17946e5121b6..b61357d07170 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -58,6 +58,11 @@ static inline bool oom_task_origin(const struct task_struct *p) return p->signal->oom_flag_origin; } +static inline bool tsk_is_oom_victim(struct task_struct * tsk) +{ + return tsk->signal->oom_mm; +} + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); diff --git a/include/linux/sched.h b/include/linux/sched.h index c4b588358296..af0721364788 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -524,7 +524,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ -#define MMF_OOM_REAPED 21 /* mm has been already reaped */ +#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) @@ -672,7 +672,6 @@ struct signal_struct { atomic_t sigcnt; atomic_t live; int nr_threads; - atomic_t oom_victims; /* # of TIF_MEDIE threads in this thread group */ struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ -- cgit v1.2.3 From 7d2e7a22cf27e7569e6816ccc05dd74248048b30 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 7 Oct 2016 16:59:00 -0700 Subject: oom, suspend: fix oom_killer_disable vs. pm suspend properly Commit 74070542099c ("oom, suspend: fix oom_reaper vs. oom_killer_disable race") has workaround an existing race between oom_killer_disable and oom_reaper by adding another round of try_to_freeze_tasks after the oom killer was disabled. This was the easiest thing to do for a late 4.7 fix. Let's fix it properly now. After "oom: keep mm of the killed task available" we no longer have to call exit_oom_victim from the oom reaper because we have stable mm available and hide the oom_reaped mm by MMF_OOM_SKIP flag. So let's remove exit_oom_victim and the race described in the above commit doesn't exist anymore if. Unfortunately this alone is not sufficient for the oom_killer_disable usecase because now we do not have any reliable way to reach exit_oom_victim (the victim might get stuck on a way to exit for an unbounded amount of time). OOM killer can cope with that by checking mm flags and move on to another victim but we cannot do the same for oom_killer_disable as we would lose the guarantee of no further interference of the victim with the rest of the system. What we can do instead is to cap the maximum time the oom_killer_disable waits for victims. The only current user of this function (pm suspend) already has a concept of timeout for back off so we can reuse the same value there. Let's drop set_freezable for the oom_reaper kthread because it is no longer needed as the reaper doesn't wake or thaw any processes. Link: http://lkml.kernel.org/r/1472119394-11342-7-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Tetsuo Handa Cc: Oleg Nesterov Cc: David Rientjes Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index b61357d07170..0f1b9da108e4 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -74,7 +74,7 @@ extern void exit_oom_victim(struct task_struct *tsk); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); -extern bool oom_killer_disable(void); +extern bool oom_killer_disable(signed long timeout); extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); -- cgit v1.2.3 From 38531201c12144cd7d96abfdfe7449c2b01375e8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 7 Oct 2016 16:59:03 -0700 Subject: mm, oom: enforce exit_oom_victim on current task There are no users of exit_oom_victim on !current task anymore so enforce the API to always work on the current. Link: http://lkml.kernel.org/r/1472119394-11342-8-git-send-email-mhocko@kernel.org Signed-off-by: Tetsuo Handa Signed-off-by: Michal Hocko Cc: Oleg Nesterov Cc: David Rientjes Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 0f1b9da108e4..b4e36e92bc87 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -69,7 +69,7 @@ extern unsigned long oom_badness(struct task_struct *p, extern bool out_of_memory(struct oom_control *oc); -extern void exit_oom_victim(struct task_struct *tsk); +extern void exit_oom_victim(void); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 3f70dc38cec2ad6e5355f80c4c7a15a3f7e97a19 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 7 Oct 2016 16:59:06 -0700 Subject: mm: make sure that kthreads will not refault oom reaped memory There are only few use_mm() users in the kernel right now. Most of them write to the target memory but vhost driver relies on copy_from_user/get_user from a kernel thread context. This makes it impossible to reap the memory of an oom victim which shares the mm with the vhost kernel thread because it could see a zero page unexpectedly and theoretically make an incorrect decision visible outside of the killed task context. To quote Michael S. Tsirkin: : Getting an error from __get_user and friends is handled gracefully. : Getting zero instead of a real value will cause userspace : memory corruption. The vhost kernel thread is bound to an open fd of the vhost device which is not tight to the mm owner life cycle in general. The device fd can be inherited or passed over to another process which means that we really have to be careful about unexpected memory corruption because unlike for normal oom victims the result will be visible outside of the oom victim context. Make sure that no kthread context (users of use_mm) can ever see corrupted data because of the oom reaper and hook into the page fault path by checking MMF_UNSTABLE mm flag. __oom_reap_task_mm will set the flag before it starts unmapping the address space while the flag is checked after the page fault has been handled. If the flag is set then SIGBUS is triggered so any g-u-p user will get a error code. Regular tasks do not need this protection because all which share the mm are killed when the mm is reaped and so the corruption will not outlive them. This patch shouldn't have any visible effect at this moment because the OOM killer doesn't invoke oom reaper for tasks with mm shared with kthreads yet. Link: http://lkml.kernel.org/r/1472119394-11342-9-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: "Michael S. Tsirkin" Cc: Tetsuo Handa Cc: Oleg Nesterov Cc: David Rientjes Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index af0721364788..6bee6f988912 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -525,6 +525,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ +#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) -- cgit v1.2.3 From f6f34b4387d9e18304451a131b35d7c4f27a0b5a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Fri, 7 Oct 2016 16:59:15 -0700 Subject: mm: introduce arch_reserved_kernel_pages() Currently arch specific code can reserve memory blocks but alloc_large_system_hash() may not take it into consideration when sizing the hashes. This can lead to bigger hash than required and lead to no available memory for other purposes. This is specifically true for systems with CONFIG_DEFERRED_STRUCT_PAGE_INIT enabled. One approach to solve this problem would be to walk through the memblock regions and calculate the available memory and base the size of hash system on the available memory. The other approach would be to depend on the architecture to provide the number of pages that are reserved. This change provides hooks to allow the architecture to provide the required info. Link: http://lkml.kernel.org/r/1472476010-4709-2-git-send-email-srikar@linux.vnet.ibm.com Signed-off-by: Srikar Dronamraju Suggested-by: Mel Gorman Cc: Vlastimil Babka Cc: Michal Hocko Cc: Michael Ellerman Cc: Mahesh Salgaonkar Cc: Hari Bathini Cc: Dave Hansen Cc: Balbir Singh Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0a063b4e4456..046077b4209d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1924,6 +1924,9 @@ extern void show_mem(unsigned int flags); extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); +#ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES +extern unsigned long arch_reserved_kernel_pages(void); +#endif extern __printf(3, 4) void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, -- cgit v1.2.3 From 8907de5dc6e9d5925cf3b0a698cc3a4272fda073 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Fri, 7 Oct 2016 16:59:18 -0700 Subject: mm/memblock.c: expose total reserved memory The total reserved memory in a system is accounted but not available for use use outside mm/memblock.c. By exposing the total reserved memory, systems can better calculate the size of large hashes. Link: http://lkml.kernel.org/r/1472476010-4709-3-git-send-email-srikar@linux.vnet.ibm.com Signed-off-by: Srikar Dronamraju Suggested-by: Mel Gorman Cc: Vlastimil Babka Cc: Michal Hocko Cc: Michael Ellerman Cc: Mahesh Salgaonkar Cc: Hari Bathini Cc: Dave Hansen Cc: Balbir Singh Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 2925da23505d..5b759c9acf97 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -328,6 +328,7 @@ phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); phys_addr_t memblock_phys_mem_size(void); +phys_addr_t memblock_reserved_size(void); phys_addr_t memblock_mem_size(unsigned long limit_pfn); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); -- cgit v1.2.3 From 2382705f22c1436a153800cf6051b08f0ea14838 Mon Sep 17 00:00:00 2001 From: zijun_hu Date: Fri, 7 Oct 2016 16:59:24 -0700 Subject: mm/nobootmem.c: remove duplicate macro ARCH_LOW_ADDRESS_LIMIT statements Fix the following bugs: - the same ARCH_LOW_ADDRESS_LIMIT statements are duplicated between header and relevant source - don't ensure ARCH_LOW_ADDRESS_LIMIT perhaps defined by ARCH in asm/processor.h is preferred over default in linux/bootmem.h completely since the former header isn't included by the latter Link: http://lkml.kernel.org/r/e046aeaa-e160-6d9e-dc1b-e084c2fd999f@zoho.com Signed-off-by: zijun_hu Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index f9be32691718..962164d36506 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -7,6 +7,7 @@ #include #include #include +#include /* * simple boot-time physical memory area allocator. @@ -119,6 +120,10 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #define BOOTMEM_LOW_LIMIT __pa(MAX_DMA_ADDRESS) #endif +#ifndef ARCH_LOW_ADDRESS_LIMIT +#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL +#endif + #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT) #define alloc_bootmem_align(x, align) \ @@ -180,10 +185,6 @@ static inline void * __init memblock_virt_alloc_nopanic( NUMA_NO_NODE); } -#ifndef ARCH_LOW_ADDRESS_LIMIT -#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL -#endif - static inline void * __init memblock_virt_alloc_low( phys_addr_t size, phys_addr_t align) { -- cgit v1.2.3 From 371a096edf43a8c71844cf71c20765c8b21d07d9 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 7 Oct 2016 16:59:30 -0700 Subject: mm: don't use radix tree writeback tags for pages in swap cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit File pages use a set of radix tree tags (DIRTY, TOWRITE, WRITEBACK, etc.) to accelerate finding the pages with a specific tag in the radix tree during inode writeback. But for anonymous pages in the swap cache, there is no inode writeback. So there is no need to find the pages with some writeback tags in the radix tree. It is not necessary to touch radix tree writeback tags for pages in the swap cache. Per Rik van Riel's suggestion, a new flag AS_NO_WRITEBACK_TAGS is introduced for address spaces which don't need to update the writeback tags. The flag is set for swap caches. It may be used for DAX file systems, etc. With this patch, the swap out bandwidth improved 22.3% (from ~1.2GB/s to ~1.48GBps) in the vm-scalability swap-w-seq test case with 8 processes. The test is done on a Xeon E5 v3 system. The swap device used is a RAM simulated PMEM (persistent memory) device. The improvement comes from the reduced contention on the swap cache radix tree lock. To test sequential swapping out, the test case uses 8 processes, which sequentially allocate and write to the anonymous pages until RAM and part of the swap device is used up. Details of comparison is as follow, base base+patch ---------------- -------------------------- %stddev %change %stddev \ | \ 2506952 ± 2% +28.1% 3212076 ± 7% vm-scalability.throughput 1207402 ± 7% +22.3% 1476578 ± 6% vmstat.swap.so 10.86 ± 12% -23.4% 8.31 ± 16% perf-profile.cycles-pp._raw_spin_lock_irq.__add_to_swap_cache.add_to_swap_cache.add_to_swap.shrink_page_list 10.82 ± 13% -33.1% 7.24 ± 14% perf-profile.cycles-pp._raw_spin_lock_irqsave.__remove_mapping.shrink_page_list.shrink_inactive_list.shrink_zone_memcg 10.36 ± 11% -100.0% 0.00 ± -1% perf-profile.cycles-pp._raw_spin_lock_irqsave.__test_set_page_writeback.bdev_write_page.__swap_writepage.swap_writepage 10.52 ± 12% -100.0% 0.00 ± -1% perf-profile.cycles-pp._raw_spin_lock_irqsave.test_clear_page_writeback.end_page_writeback.page_endio.pmem_rw_page Link: http://lkml.kernel.org/r/1472578089-5560-1-git-send-email-ying.huang@intel.com Signed-off-by: "Huang, Ying" Acked-by: Rik van Riel Cc: Hugh Dickins Cc: Shaohua Li Cc: Minchan Kim Cc: Mel Gorman Cc: Tejun Heo Cc: Wu Fengguang Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 01e84436cddf..48d9cf04337c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -25,6 +25,8 @@ enum mapping_flags { AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */ AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */ AS_EXITING = __GFP_BITS_SHIFT + 4, /* final truncate in progress */ + /* writeback related tags are not used */ + AS_NO_WRITEBACK_TAGS = __GFP_BITS_SHIFT + 5, }; static inline void mapping_set_error(struct address_space *mapping, int error) @@ -64,6 +66,16 @@ static inline int mapping_exiting(struct address_space *mapping) return test_bit(AS_EXITING, &mapping->flags); } +static inline void mapping_set_no_writeback_tags(struct address_space *mapping) +{ + set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); +} + +static inline int mapping_use_writeback_tags(struct address_space *mapping) +{ + return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; -- cgit v1.2.3 From 74d2fad1334d12bac8fe017aba598dd66c86628b Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 7 Oct 2016 16:59:56 -0700 Subject: thp, dax: add thp_get_unmapped_area for pmd mappings When CONFIG_FS_DAX_PMD is set, DAX supports mmap() using pmd page size. This feature relies on both mmap virtual address and FS block (i.e. physical address) to be aligned by the pmd page size. Users can use mkfs options to specify FS to align block allocations. However, aligning mmap address requires code changes to existing applications for providing a pmd-aligned address to mmap(). For instance, fio with "ioengine=mmap" performs I/Os with mmap() [1]. It calls mmap() with a NULL address, which needs to be changed to provide a pmd-aligned address for testing with DAX pmd mappings. Changing all applications that call mmap() with NULL is undesirable. Add thp_get_unmapped_area(), which can be called by filesystem's get_unmapped_area to align an mmap address by the pmd size for a DAX file. It calls the default handler, mm->get_unmapped_area(), to find a range and then aligns it for a DAX file. The patch is based on Matthew Wilcox's change that allows adding support of the pud page size easily. [1]: https://github.com/axboe/fio/blob/master/engines/mmap.c Link: http://lkml.kernel.org/r/1472497881-9323-2-git-send-email-toshi.kani@hpe.com Signed-off-by: Toshi Kani Reviewed-by: Dan Williams Cc: Matthew Wilcox Cc: Ross Zwisler Cc: Kirill A. Shutemov Cc: Dave Chinner Cc: Jan Kara Cc: Theodore Ts'o Cc: Andreas Dilger Cc: Mike Kravetz Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 6f14de45b5ce..4fca5263fd42 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -87,6 +87,10 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); extern unsigned long transparent_hugepage_flags; +extern unsigned long thp_get_unmapped_area(struct file *filp, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags); + extern void prep_transhuge_page(struct page *page); extern void free_transhuge_page(struct page *page); @@ -169,6 +173,9 @@ void put_huge_zero_page(void); static inline void prep_transhuge_page(struct page *page) {} #define transparent_hugepage_flags 0UL + +#define thp_get_unmapped_area NULL + static inline int split_huge_page_to_list(struct page *page, struct list_head *list) { -- cgit v1.2.3 From 6fcb52a56ff60d240f06296b12827e7f20d45f63 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Fri, 7 Oct 2016 17:00:08 -0700 Subject: thp: reduce usage of huge zero page's atomic counter The global zero page is used to satisfy an anonymous read fault. If THP(Transparent HugePage) is enabled then the global huge zero page is used. The global huge zero page uses an atomic counter for reference counting and is allocated/freed dynamically according to its counter value. CPU time spent on that counter will greatly increase if there are a lot of processes doing anonymous read faults. This patch proposes a way to reduce the access to the global counter so that the CPU load can be reduced accordingly. To do this, a new flag of the mm_struct is introduced: MMF_USED_HUGE_ZERO_PAGE. With this flag, the process only need to touch the global counter in two cases: 1 The first time it uses the global huge zero page; 2 The time when mm_user of its mm_struct reaches zero. Note that right now, the huge zero page is eligible to be freed as soon as its last use goes away. With this patch, the page will not be eligible to be freed until the exit of the last process from which it was ever used. And with the use of mm_user, the kthread is not eligible to use huge zero page either. Since no kthread is using huge zero page today, there is no difference after applying this patch. But if that is not desired, I can change it to when mm_count reaches zero. Case used for test on Haswell EP: usemem -n 72 --readonly -j 0x200000 100G Which spawns 72 processes and each will mmap 100G anonymous space and then do read only access to that space sequentially with a step of 2MB. CPU cycles from perf report for base commit: 54.03% usemem [kernel.kallsyms] [k] get_huge_zero_page CPU cycles from perf report for this commit: 0.11% usemem [kernel.kallsyms] [k] mm_get_huge_zero_page Performance(throughput) of the workload for base commit: 1784430792 Performance(throughput) of the workload for this commit: 4726928591 164% increase. Runtime of the workload for base commit: 707592 us Runtime of the workload for this commit: 303970 us 50% drop. Link: http://lkml.kernel.org/r/fe51a88f-446a-4622-1363-ad1282d71385@intel.com Signed-off-by: Aaron Lu Cc: Sergey Senozhatsky Cc: "Kirill A. Shutemov" Cc: Dave Hansen Cc: Tim Chen Cc: Huang Ying Cc: Vlastimil Babka Cc: Jerome Marchand Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Ebru Akagunduz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 8 ++++---- include/linux/sched.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4fca5263fd42..9b9f65d99873 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -156,8 +156,8 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) return is_huge_zero_page(pmd_page(pmd)); } -struct page *get_huge_zero_page(void); -void put_huge_zero_page(void); +struct page *mm_get_huge_zero_page(struct mm_struct *mm); +void mm_put_huge_zero_page(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) @@ -220,9 +220,9 @@ static inline bool is_huge_zero_page(struct page *page) return false; } -static inline void put_huge_zero_page(void) +static inline void mm_put_huge_zero_page(struct mm_struct *mm) { - BUILD_BUG(); + return; } static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, diff --git a/include/linux/sched.h b/include/linux/sched.h index 6bee6f988912..348f51b0ec92 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -526,6 +526,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ +#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) -- cgit v1.2.3 From f6ab1f7f6b2d8e48c5fc47746a67363b20d79a1d Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 7 Oct 2016 17:00:21 -0700 Subject: mm, swap: use offset of swap entry as key of swap cache This patch is to improve the performance of swap cache operations when the type of the swap device is not 0. Originally, the whole swap entry value is used as the key of the swap cache, even though there is one radix tree for each swap device. If the type of the swap device is not 0, the height of the radix tree of the swap cache will be increased unnecessary, especially on 64bit architecture. For example, for a 1GB swap device on the x86_64 architecture, the height of the radix tree of the swap cache is 11. But if the offset of the swap entry is used as the key of the swap cache, the height of the radix tree of the swap cache is 4. The increased height causes unnecessary radix tree descending and increased cache footprint. This patch reduces the height of the radix tree of the swap cache via using the offset of the swap entry instead of the whole swap entry value as the key of the swap cache. In 32 processes sequential swap out test case on a Xeon E5 v3 system with RAM disk as swap, the lock contention for the spinlock of the swap cache is reduced from 20.15% to 12.19%, when the type of the swap device is 1. Use the whole swap entry as key, perf-profile.calltrace.cycles-pp._raw_spin_lock_irq.__add_to_swap_cache.add_to_swap_cache.add_to_swap.shrink_page_list: 10.37, perf-profile.calltrace.cycles-pp._raw_spin_lock_irqsave.__remove_mapping.shrink_page_list.shrink_inactive_list.shrink_node_memcg: 9.78, Use the swap offset as key, perf-profile.calltrace.cycles-pp._raw_spin_lock_irq.__add_to_swap_cache.add_to_swap_cache.add_to_swap.shrink_page_list: 6.25, perf-profile.calltrace.cycles-pp._raw_spin_lock_irqsave.__remove_mapping.shrink_page_list.shrink_inactive_list.shrink_node_memcg: 5.94, Link: http://lkml.kernel.org/r/1473270649-27229-1-git-send-email-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: "Kirill A. Shutemov" Cc: Dave Hansen Cc: Dan Williams Cc: Joonsoo Kim Cc: Hugh Dickins Cc: Mel Gorman Cc: Minchan Kim Cc: Aaron Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 046077b4209d..028e84e2ab42 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1048,19 +1048,19 @@ struct address_space *page_file_mapping(struct page *page) return page->mapping; } +extern pgoff_t __page_file_index(struct page *page); + /* * Return the pagecache index of the passed page. Regular pagecache pages - * use ->index whereas swapcache pages use ->private + * use ->index whereas swapcache pages use swp_offset(->private) */ static inline pgoff_t page_index(struct page *page) { if (unlikely(PageSwapCache(page))) - return page_private(page); + return __page_file_index(page); return page->index; } -extern pgoff_t __page_file_index(struct page *page); - /* * Return the file index of the page. Regular pagecache pages use ->index * whereas swapcache pages use swp_offset(->private) -- cgit v1.2.3 From 8cd797887ae0a73313ba248e027e59c0a597d693 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 7 Oct 2016 17:00:24 -0700 Subject: mm: remove page_file_index After using the offset of the swap entry as the key of the swap cache, the page_index() becomes exactly same as page_file_index(). So the page_file_index() is removed and the callers are changed to use page_index() instead. Link: http://lkml.kernel.org/r/1473270649-27229-2-git-send-email-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Trond Myklebust Cc: Anna Schumaker Cc: "Kirill A. Shutemov" Cc: Michal Hocko Cc: Dave Hansen Cc: Johannes Weiner Cc: Dan Williams Cc: Joonsoo Kim Cc: Ross Zwisler Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 12 ------------ include/linux/pagemap.h | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 028e84e2ab42..3e8807e0b9d2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1061,18 +1061,6 @@ static inline pgoff_t page_index(struct page *page) return page->index; } -/* - * Return the file index of the page. Regular pagecache pages use ->index - * whereas swapcache pages use swp_offset(->private) - */ -static inline pgoff_t page_file_index(struct page *page) -{ - if (unlikely(PageSwapCache(page))) - return __page_file_index(page); - - return page->index; -} - bool page_mapped(struct page *page); struct address_space *page_mapping(struct page *page); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 48d9cf04337c..794dbcb91084 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -408,7 +408,7 @@ static inline loff_t page_offset(struct page *page) static inline loff_t page_file_offset(struct page *page) { - return ((loff_t)page_file_index(page)) << PAGE_SHIFT; + return ((loff_t)page_index(page)) << PAGE_SHIFT; } extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, -- cgit v1.2.3 From c2033b00dbe856909fcaccf038e4e0d3dcfb85af Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 7 Oct 2016 17:00:34 -0700 Subject: mm, compaction: restrict full priority to non-costly orders The new ultimate compaction priority disables some heuristics, which may result in excessive cost. This is fine for non-costly orders where we want to try hard before resulting for OOM, but might be disruptive for costly orders which do not trigger OOM and should generally have some fallback. Thus, we disable the full priority for costly orders. Suggested-by: Michal Hocko Link: http://lkml.kernel.org/r/20160906135258.18335-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Joonsoo Kim Cc: David Rientjes Cc: Rik van Riel Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 585d55cb0dc0..0d8415820fc3 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -9,6 +9,7 @@ enum compact_priority { COMPACT_PRIO_SYNC_FULL, MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_FULL, COMPACT_PRIO_SYNC_LIGHT, + MIN_COMPACT_COSTLY_PRIORITY = COMPACT_PRIO_SYNC_LIGHT, DEF_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT, COMPACT_PRIO_ASYNC, INIT_COMPACT_PRIORITY = COMPACT_PRIO_ASYNC -- cgit v1.2.3 From 2d75807383459c04d457bf2d295fa6ad858507d2 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 7 Oct 2016 17:00:58 -0700 Subject: mm: memcontrol: consolidate cgroup socket tracking The cgroup core and the memory controller need to track socket ownership for different purposes, but the tracking sites being entirely different is kind of ugly. Be a better citizen and rename the memory controller callbacks to match the cgroup core callbacks, then move them to the same place. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20160914194846.11153-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Tejun Heo Cc: "David S. Miller" Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0710143723bc..61d20c17f3b7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -773,13 +773,13 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -void sock_update_memcg(struct sock *sk); -void sock_release_memcg(struct sock *sk); bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #ifdef CONFIG_MEMCG extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) +void mem_cgroup_sk_alloc(struct sock *sk); +void mem_cgroup_sk_free(struct sock *sk); static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure) @@ -792,6 +792,8 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) } #else #define mem_cgroup_sockets_enabled 0 +static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; +static inline void mem_cgroup_sk_free(struct sock *sk) { }; static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; -- cgit v1.2.3 From 082d5b6b60e9f25e1511557fcfcb21eedd267446 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 7 Oct 2016 17:01:10 -0700 Subject: mm/hugetlb: check for reserved hugepages during memory offline In dissolve_free_huge_pages(), free hugepages will be dissolved without making sure that there are enough of them left to satisfy hugepage reservations. Fix this by adding a return value to dissolve_free_huge_pages() and checking h->free_huge_pages vs. h->resv_huge_pages. Note that this may lead to the situation where dissolve_free_huge_page() returns an error and all free hugepages that were dissolved before that error are lost, while the memory block still cannot be set offline. Fixes: c8721bbb ("mm: memory-hotplug: enable memory hotplug to handle hugepage") Link: http://lkml.kernel.org/r/20160926172811.94033-3-gerald.schaefer@de.ibm.com Signed-off-by: Gerald Schaefer Acked-by: Michal Hocko Acked-by: Naoya Horiguchi Cc: "Kirill A . Shutemov" Cc: Vlastimil Babka Cc: Mike Kravetz Cc: "Aneesh Kumar K . V" Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Rui Teng Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c26d4638f665..fe99e6f956e2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -450,8 +450,8 @@ static inline pgoff_t basepage_index(struct page *page) return __basepage_index(page); } -extern void dissolve_free_huge_pages(unsigned long start_pfn, - unsigned long end_pfn); +extern int dissolve_free_huge_pages(unsigned long start_pfn, + unsigned long end_pfn); static inline bool hugepage_migration_supported(struct hstate *h) { #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION @@ -518,7 +518,7 @@ static inline pgoff_t basepage_index(struct page *page) { return page->index; } -#define dissolve_free_huge_pages(s, e) do {} while (0) +#define dissolve_free_huge_pages(s, e) 0 #define hugepage_migration_supported(h) false static inline spinlock_t *huge_pte_lockptr(struct hstate *h, -- cgit v1.2.3 From 6d2329f8872f23e46a19d240930571510ce525eb Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 7 Oct 2016 17:01:22 -0700 Subject: mm: vm_page_prot: update with WRITE_ONCE/READ_ONCE vma->vm_page_prot is read lockless from the rmap_walk, it may be updated concurrently and this prevents the risk of reading intermediate values. Link: http://lkml.kernel.org/r/1474660305-19222-1-git-send-email-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Cc: Rik van Riel Cc: Hugh Dickins Cc: Mel Gorman Cc: Jan Vorlicek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3e8807e0b9d2..040a04a88996 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1517,7 +1517,7 @@ static inline int pte_devmap(pte_t pte) } #endif -int vma_wants_writenotify(struct vm_area_struct *vma); +int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); -- cgit v1.2.3 From e86f15ee64d8ee46255d964d55f74f5ba9af8c36 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 7 Oct 2016 17:01:28 -0700 Subject: mm: vma_merge: fix vm_page_prot SMP race condition against rmap_walk The rmap_walk can access vm_page_prot (and potentially vm_flags in the pte/pmd manipulations). So it's not safe to wait the caller to update the vm_page_prot/vm_flags after vma_merge returned potentially removing the "next" vma and extending the "current" vma over the next->vm_start,vm_end range, but still with the "current" vma vm_page_prot, after releasing the rmap locks. The vm_page_prot/vm_flags must be transferred from the "next" vma to the current vma while vma_merge still holds the rmap locks. The side effect of this race condition is pte corruption during migrate as remove_migration_ptes when run on a address of the "next" vma that got removed, used the vm_page_prot of the current vma. migrate mprotect ------------ ------------- migrating in "next" vma vma_merge() # removes "next" vma and # extends "current" vma # current vma is not with # vm_page_prot updated remove_migration_ptes read vm_page_prot of current "vma" establish pte with wrong permissions vm_set_page_prot(vma) # too late! change_protection in the old vma range only, next range is not updated This caused segmentation faults and potentially memory corruption in heavy mprotect loads with some light page migration caused by compaction in the background. Hugh Dickins pointed out the comment about the Odd case 8 in vma_merge which confirms the case 8 is only buggy one where the race can trigger, in all other vma_merge cases the above cannot happen. This fix removes the oddness factor from case 8 and it converts it from: AAAA PPPPNNNNXXXX -> PPPPNNNNNNNN to: AAAA PPPPNNNNXXXX -> PPPPXXXXXXXX XXXX has the right vma properties for the whole merged vma returned by vma_adjust, so it solves the problem fully. It has the added benefits that the callers could stop updating vma properties when vma_merge succeeds however the callers are not updated by this patch (there are bits like VM_SOFTDIRTY that still need special care for the whole range, as the vma merging ignores them, but as long as they're not processed by rmap walks and instead they're accessed with the mmap_sem at least for reading, they are fine not to be updated within vma_adjust before releasing the rmap_locks). Link: http://lkml.kernel.org/r/1474309513-20313-1-git-send-email-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Aditya Mandaleeka Cc: Rik van Riel Cc: Hugh Dickins Cc: Mel Gorman Cc: Jan Vorlicek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 040a04a88996..2c8ed8a894c8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1968,8 +1968,14 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); +extern int __vma_adjust(struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, + struct vm_area_struct *expand); +static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) +{ + return __vma_adjust(vma, start, end, pgoff, insert, NULL); +} extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, -- cgit v1.2.3 From 7877cdcc3893c1bd9a833b2f0398e7320794c6e6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 7 Oct 2016 17:01:55 -0700 Subject: mm: consolidate warn_alloc_failed users warn_alloc_failed is currently used from the page and vmalloc allocators. This is a good reuse of the code except that vmalloc would appreciate a slightly different warning message. This is already handled by the fmt parameter except that "%s: page allocation failure: order:%u, mode:%#x(%pGg)" is printed anyway. This might be quite misleading because it might be a vmalloc failure which leads to the warning while the page allocator is not the culprit here. Fix this by always using the fmt string and only print the context that makes sense for the particular context (e.g. order makes only very little sense for the vmalloc context). Rename the function to not miss any user and also because a later patch will reuse it also for !failure cases. Link: http://lkml.kernel.org/r/20160929084407.7004-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Tetsuo Handa Cc: Johannes Weiner Cc: Mel Gorman Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2c8ed8a894c8..f7231411ad5a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1916,9 +1916,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid); extern unsigned long arch_reserved_kernel_pages(void); #endif -extern __printf(3, 4) -void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, - const char *fmt, ...); +extern __printf(2, 3) +void warn_alloc(gfp_t gfp_mask, const char *fmt, ...); extern void setup_per_cpu_pageset(void); -- cgit v1.2.3 From 72e2936c04f7d2a4bf87d7f72d3bf11cf91ebb47 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 7 Oct 2016 17:02:01 -0700 Subject: mm: remove unnecessary condition in remove_inode_hugepages When the huge page is added to the page cahce (huge_add_to_page_cache), the page private flag will be cleared. since this code (remove_inode_hugepages) will only be called for pages in the page cahce, PagePrivate(page) will always be false. The patch remove the code without any functional change. Link: http://lkml.kernel.org/r/1475113323-29368-1-git-send-email-zhongjiang@huawei.com Signed-off-by: zhong jiang Reviewed-by: Naoya Horiguchi Reviewed-by: Mike Kravetz Tested-by: Mike Kravetz Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index fe99e6f956e2..48c76d612d40 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -90,7 +90,7 @@ int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); void free_huge_page(struct page *page); -void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve); +void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, struct vm_area_struct *vma, -- cgit v1.2.3 From 1061b0d21e16550e7d7893a5deee2e49ea3990ad Mon Sep 17 00:00:00 2001 From: zijun_hu Date: Fri, 7 Oct 2016 17:02:04 -0700 Subject: linux/mm.h: canonicalize macro PAGE_ALIGNED() definition The macro PAGE_ALIGNED() is prone to cause error because it doesn't follow convention to parenthesize parameter @addr within macro body, for example unsigned long *ptr = kmalloc(...); PAGE_ALIGNED(ptr + 16); for the left parameter of macro IS_ALIGNED(), (unsigned long)(ptr + 16) is desired but the actual one is (unsigned long)ptr + 16. It is fixed by simply canonicalizing macro PAGE_ALIGNED() definition. Link: http://lkml.kernel.org/r/57EA6AE7.7090807@zoho.com Signed-off-by: zijun_hu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f7231411ad5a..e9caec6a51e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -126,7 +126,7 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ -#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE) +#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) /* * Linux kernel virtual memory manager primitives. -- cgit v1.2.3 From 75ba1d07fd6a494851db5132612944a9d4773f9c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 7 Oct 2016 17:02:20 -0700 Subject: seq/proc: modify seq_put_decimal_[u]ll to take a const char *, not char Allow some seq_puts removals by taking a string instead of a single char. [akpm@linux-foundation.org: update vmstat_show(), per Joe] Link: http://lkml.kernel.org/r/667e1cf3d436de91a5698170a1e98d882905e956.1470704995.git.joe@perches.com Signed-off-by: Joe Perches Cc: Joe Perches Cc: Andi Kleen Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index f3d45dd42695..e305b66a9fb9 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -117,9 +117,9 @@ __printf(2, 3) void seq_printf(struct seq_file *m, const char *fmt, ...); void seq_putc(struct seq_file *m, char c); void seq_puts(struct seq_file *m, const char *s); -void seq_put_decimal_ull(struct seq_file *m, char delimiter, +void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num); -void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num); +void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num); void seq_escape(struct seq_file *m, const char *s, const char *esc); void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, -- cgit v1.2.3 From 589a9785ee3a7cb85f1dedc3dad1c9754c691880 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 7 Oct 2016 17:02:42 -0700 Subject: min/max: remove sparse warnings when they're nested Currently, when min/max are nested within themselves, sparse will warn: warning: symbol '_min1' shadows an earlier one originally declared here warning: symbol '_min1' shadows an earlier one originally declared here warning: symbol '_min2' shadows an earlier one originally declared here This also immediately happens when min3() or max3() are used. Since sparse implements __COUNTER__, we can use __UNIQUE_ID() to generate unique variable names, avoiding this. Link: http://lkml.kernel.org/r/1471519773-29882-1-git-send-email-johannes@sipsolutions.net Signed-off-by: Johannes Berg Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 74fd6f05bc5b..bc6ed52a39b9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -733,17 +733,25 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * strict type-checking.. See the * "unnecessary" pointer comparison. */ -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) - -#define max(x, y) ({ \ - typeof(x) _max1 = (x); \ - typeof(y) _max2 = (y); \ - (void) (&_max1 == &_max2); \ - _max1 > _max2 ? _max1 : _max2; }) +#define __min(t1, t2, min1, min2, x, y) ({ \ + t1 min1 = (x); \ + t2 min2 = (y); \ + (void) (&min1 == &min2); \ + min1 < min2 ? min1 : min2; }) +#define min(x, y) \ + __min(typeof(x), typeof(y), \ + __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ + x, y) + +#define __max(t1, t2, max1, max2, x, y) ({ \ + t1 max1 = (x); \ + t2 max2 = (y); \ + (void) (&max1 == &max2); \ + max1 > max2 ? max1 : max2; }) +#define max(x, y) \ + __max(typeof(x), typeof(y), \ + __UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \ + x, y) #define min3(x, y, z) min((typeof(x))min(x, y), z) #define max3(x, y, z) max((typeof(x))max(x, y), z) @@ -775,15 +783,15 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * * Or not use min/max/clamp at all, of course. */ -#define min_t(type, x, y) ({ \ - type __min1 = (x); \ - type __min2 = (y); \ - __min1 < __min2 ? __min1: __min2; }) - -#define max_t(type, x, y) ({ \ - type __max1 = (x); \ - type __max2 = (y); \ - __max1 > __max2 ? __max1: __max2; }) +#define min_t(type, x, y) \ + __min(type, type, \ + __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ + x, y) + +#define max_t(type, x, y) \ + __max(type, type, \ + __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ + x, y) /** * clamp_t - return a value clamped to a given range using a given type -- cgit v1.2.3 From 9a01c3ed5cdb35d9004eb92510ee6ea11b4a5f16 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 7 Oct 2016 17:02:45 -0700 Subject: nmi_backtrace: add more trigger_*_cpu_backtrace() methods Patch series "improvements to the nmi_backtrace code" v9. This patch series modifies the trigger_xxx_backtrace() NMI-based remote backtracing code to make it more flexible, and makes a few small improvements along the way. The motivation comes from the task isolation code, where there are scenarios where we want to be able to diagnose a case where some cpu is about to interrupt a task-isolated cpu. It can be helpful to see both where the interrupting cpu is, and also an approximation of where the cpu that is being interrupted is. The nmi_backtrace framework allows us to discover the stack of the interrupted cpu. I've tested that the change works as desired on tile, and build-tested x86, arm, mips, and sparc64. For x86 I confirmed that the generic cpuidle stuff as well as the architecture-specific routines are in the new cpuidle section. For arm, mips, and sparc I just build-tested it and made sure the generic cpuidle routines were in the new cpuidle section, but I didn't attempt to figure out which the platform-specific idle routines might be. That might be more usefully done by someone with platform experience in follow-up patches. This patch (of 4): Currently you can only request a backtrace of either all cpus, or all cpus but yourself. It can also be helpful to request a remote backtrace of a single cpu, and since we want that, the logical extension is to support a cpumask as the underlying primitive. This change modifies the existing lib/nmi_backtrace.c code to take a cpumask as its basic primitive, and modifies the linux/nmi.h code to use the new "cpumask" method instead. The existing clients of nmi_backtrace (arm and x86) are converted to using the new cpumask approach in this change. The other users of the backtracing API (sparc64 and mips) are converted to use the cpumask approach rather than the all/allbutself approach. The mips code ignored the "include_self" boolean but with this change it will now also dump a local backtrace if requested. Link: http://lkml.kernel.org/r/1472487169-14923-2-git-send-email-cmetcalf@mellanox.com Signed-off-by: Chris Metcalf Tested-by: Daniel Thompson [arm] Reviewed-by: Aaron Tomlin Reviewed-by: Petr Mladek Cc: "Rafael J. Wysocki" Cc: Russell King Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Ralf Baechle Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 4630eeae18e0..a78c35cff1ae 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -35,21 +35,34 @@ static inline void hardlockup_detector_disable(void) {} * base function. Return whether such support was available, * to allow calling code to fall back to some other mechanism: */ -#ifdef arch_trigger_all_cpu_backtrace +#ifdef arch_trigger_cpumask_backtrace static inline bool trigger_all_cpu_backtrace(void) { - arch_trigger_all_cpu_backtrace(true); - + arch_trigger_cpumask_backtrace(cpu_online_mask, false); return true; } + static inline bool trigger_allbutself_cpu_backtrace(void) { - arch_trigger_all_cpu_backtrace(false); + arch_trigger_cpumask_backtrace(cpu_online_mask, true); + return true; +} + +static inline bool trigger_cpumask_backtrace(struct cpumask *mask) +{ + arch_trigger_cpumask_backtrace(mask, false); + return true; +} + +static inline bool trigger_single_cpu_backtrace(int cpu) +{ + arch_trigger_cpumask_backtrace(cpumask_of(cpu), false); return true; } /* generic implementation */ -void nmi_trigger_all_cpu_backtrace(bool include_self, +void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, + bool exclude_self, void (*raise)(cpumask_t *mask)); bool nmi_cpu_backtrace(struct pt_regs *regs); @@ -62,6 +75,14 @@ static inline bool trigger_allbutself_cpu_backtrace(void) { return false; } +static inline bool trigger_cpumask_backtrace(struct cpumask *mask) +{ + return false; +} +static inline bool trigger_single_cpu_backtrace(int cpu) +{ + return false; +} #endif #ifdef CONFIG_LOCKUP_DETECTOR -- cgit v1.2.3 From 6727ad9e206cc08b80d8000a4d67f8417e53539d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 7 Oct 2016 17:02:55 -0700 Subject: nmi_backtrace: generate one-line reports for idle cpus When doing an nmi backtrace of many cores, most of which are idle, the output is a little overwhelming and very uninformative. Suppress messages for cpus that are idling when they are interrupted and just emit one line, "NMI backtrace for N skipped: idling at pc 0xNNN". We do this by grouping all the cpuidle code together into a new .cpuidle.text section, and then checking the address of the interrupted PC to see if it lies within that section. This commit suitably tags x86 and tile idle routines, and only adds in the minimal framework for other architectures. Link: http://lkml.kernel.org/r/1472487169-14923-5-git-send-email-cmetcalf@mellanox.com Signed-off-by: Chris Metcalf Acked-by: Peter Zijlstra (Intel) Tested-by: Peter Zijlstra (Intel) Tested-by: Daniel Thompson [arm] Tested-by: Petr Mladek Cc: Aaron Tomlin Cc: Peter Zijlstra (Intel) Cc: "Rafael J. Wysocki" Cc: Russell King Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7572d9e9dced..b886dc17f2f3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -231,6 +231,11 @@ void cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); +/* Attach to any functions which should be considered cpuidle. */ +#define __cpuidle __attribute__((__section__(".cpuidle.text"))) + +bool cpu_in_idle(unsigned long pc); + void arch_cpu_idle(void); void arch_cpu_idle_prepare(void); void arch_cpu_idle_enter(void); -- cgit v1.2.3 From 81243eacfa400f5f7b89f4c2323d0de9982bb0fb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 7 Oct 2016 17:03:12 -0700 Subject: cred: simpler, 1D supplementary groups Current supplementary groups code can massively overallocate memory and is implemented in a way so that access to individual gid is done via 2D array. If number of gids is <= 32, memory allocation is more or less tolerable (140/148 bytes). But if it is not, code allocates full page (!) regardless and, what's even more fun, doesn't reuse small 32-entry array. 2D array means dependent shifts, loads and LEAs without possibility to optimize them (gid is never known at compile time). All of the above is unnecessary. Switch to the usual trailing-zero-len-array scheme. Memory is allocated with kmalloc/vmalloc() and only as much as needed. Accesses become simpler (LEA 8(gi,idx,4) or even without displacement). Maximum number of gids is 65536 which translates to 256KB+8 bytes. I think kernel can handle such allocation. On my usual desktop system with whole 9 (nine) aux groups, struct group_info shrinks from 148 bytes to 44 bytes, yay! Nice side effects: - "gi->gid[i]" is shorter than "GROUP_AT(gi, i)", less typing, - fix little mess in net/ipv4/ping.c should have been using GROUP_AT macro but this point becomes moot, - aux group allocation is persistent and should be accounted as such. Link: http://lkml.kernel.org/r/20160817201927.GA2096@p183.telecom.by Signed-off-by: Alexey Dobriyan Cc: Vasily Kulikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cred.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 257db64562e5..f0e70a1bb3ac 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -26,15 +26,10 @@ struct inode; /* * COW Supplementary groups list */ -#define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(kgid_t))) - struct group_info { atomic_t usage; int ngroups; - int nblocks; - kgid_t small_block[NGROUPS_SMALL]; - kgid_t *blocks[0]; + kgid_t gid[0]; }; /** @@ -88,10 +83,6 @@ extern void set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); extern bool may_setgroups(void); -/* access the groups "array" with this macro */ -#define GROUP_AT(gi, i) \ - ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) - /* * The security context of a task * -- cgit v1.2.3 From 05fd007e46296afb24d15c7d589d535e5a5b9d5c Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 7 Oct 2016 17:03:15 -0700 Subject: console: don't prefer first registered if DT specifies stdout-path If a device tree specifies a preferred device for kernel console output via the stdout-path or linux,stdout-path chosen node properties or the stdout alias then the kernel ought to honor it & output the kernel console to that device. As it stands, this isn't the case. Whilst we parse the stdout-path properties & set an of_stdout variable from of_alias_scan(), and use that from of_console_check() to determine whether to add a console device as a preferred console whilst registering it, we also prefer the first registered console if no other has been selected at the time of its registration. This means that if a console other than the one the device tree selects via stdout-path is registered first, we will switch to using it & when the stdout-path console is later registered the call to add_preferred_console() via of_console_check() is too late to do anything useful. In practice this seems to mean that we switch to the dummy console device fairly early & see no further console output: Console: colour dummy device 80x25 console [tty0] enabled bootconsole [ns16550a0] disabled Fix this by not automatically preferring the first registered console if one is specified by the device tree. This allows consoles to be registered but not enabled, and once the driver for the console selected by stdout-path calls of_console_check() the driver will be added to the list of preferred consoles before any other console has been enabled. When that console is then registered via register_console() it will be enabled as expected. Link: http://lkml.kernel.org/r/20160809151937.26118-1-paul.burton@imgtec.com Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: Paul Burton Cc: Tejun Heo Cc: Sergey Senozhatsky Cc: Jiri Slaby Cc: Daniel Vetter Cc: Ivan Delalande Cc: Thierry Reding Cc: Borislav Petkov Cc: Jan Kara Cc: Petr Mladek Cc: Joe Perches Cc: Greg Kroah-Hartman Cc: Rob Herring Cc: Frank Rowand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/console.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index d530c4627e54..3672809234a7 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -173,6 +173,12 @@ static inline void console_sysfs_notify(void) #endif extern bool console_suspend_enabled; +#ifdef CONFIG_OF +extern void console_set_by_of(void); +#else +static inline void console_set_by_of(void) {} +#endif + /* Suspend and resume console messages over PM events */ extern void suspend_console(void); extern void resume_console(void); -- cgit v1.2.3 From fd50ecaddf8372a1d96e0daeaac0f93cf04e4d42 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 29 Sep 2016 17:48:45 +0200 Subject: vfs: Remove {get,set,remove}xattr inode operations These inode operations are no longer used; remove them. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- include/linux/fs.h | 5 ----- include/linux/xattr.h | 4 ---- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 91a7245e58c7..788261b74d45 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1739,12 +1739,7 @@ struct inode_operations { struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); - int (*setxattr) (struct dentry *, struct inode *, - const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, struct inode *, - const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, struct timespec *, int); diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 6ae6b2e68efb..e77605a0c8da 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -55,11 +55,7 @@ int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); int __vfs_removexattr(struct dentry *, const char *); int vfs_removexattr(struct dentry *, const char *); -ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); -int generic_setxattr(struct dentry *dentry, struct inode *inode, - const char *name, const void *value, size_t size, int flags); -int generic_removexattr(struct dentry *dentry, const char *name); ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, size_t size, gfp_t flags); -- cgit v1.2.3 From 21d9629a7abd87512d062fbe57b04a1baedf93d2 Mon Sep 17 00:00:00 2001 From: Alex Sidorenko Date: Fri, 7 Oct 2016 09:02:33 -0400 Subject: Fixing a bug in team driver due to incorrect 'unsigned int' to 'int' conversion Roundrobin runner of team driver uses 'unsigned int' variable to count the number of sent_packets. Later it is passed to a subroutine team_num_to_port_index(struct team *team, int num) as 'num' and when we reach MAXINT (2**31-1), 'num' becomes negative. This leads to using incorrect hash-bucket for port lookup and as a result, packets are dropped. The fix consists of changing 'int num' to 'unsigned int num'. Testing of a fixed kernel shows that there is no packet drop anymore. Signed-off-by: Alex Sidorenko Signed-off-by: David S. Miller --- include/linux/if_team.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 174f43f43aff..c05216a8fbac 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -245,7 +245,7 @@ static inline struct team_port *team_get_port_by_index(struct team *team, return NULL; } -static inline int team_num_to_port_index(struct team *team, int num) +static inline int team_num_to_port_index(struct team *team, unsigned int num) { int en_port_count = ACCESS_ONCE(team->en_port_count); -- cgit v1.2.3 From ff84136cb6a4943f489ad037fe93f43be0573c23 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Fri, 7 Oct 2016 15:39:54 +0300 Subject: watchdog: add watchdog pretimeout governor framework The change adds a simple watchdog pretimeout framework infrastructure, its purpose is to allow users to select a desired handling of watchdog pretimeout events, which may be generated by some watchdog devices. A user selects a default watchdog pretimeout governor during compilation stage. Watchdogs with WDIOF_PRETIMEOUT capability now have one more device attribute in sysfs, pretimeout_governor attribute is intended to display the selected watchdog pretimeout governor. The framework has no impact at runtime on watchdog devices with no WDIOF_PRETIMEOUT capability set. Signed-off-by: Vladimir Zapolskiy Reviewed-by: Guenter Roeck Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 4035df7ec023..35a4d8185b51 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -19,6 +19,7 @@ struct watchdog_ops; struct watchdog_device; struct watchdog_core_data; +struct watchdog_governor; /** struct watchdog_ops - The watchdog-devices operations * @@ -61,6 +62,7 @@ struct watchdog_ops { * watchdog device. * @info: Pointer to a watchdog_info structure. * @ops: Pointer to the list of watchdog operations. + * @gov: Pointer to watchdog pretimeout governor. * @bootstatus: Status of the watchdog device at boot. * @timeout: The watchdog devices timeout value (in seconds). * @pretimeout: The watchdog devices pre_timeout value. @@ -97,6 +99,7 @@ struct watchdog_device { const struct attribute_group **groups; const struct watchdog_info *info; const struct watchdog_ops *ops; + const struct watchdog_governor *gov; unsigned int bootstatus; unsigned int timeout; unsigned int pretimeout; @@ -185,6 +188,16 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) return wdd->driver_data; } +/* Use the following functions to report watchdog pretimeout event */ +#if IS_ENABLED(CONFIG_WATCHDOG_PRETIMEOUT_GOV) +void watchdog_notify_pretimeout(struct watchdog_device *wdd); +#else +static inline void watchdog_notify_pretimeout(struct watchdog_device *wdd) +{ + pr_alert("watchdog%d: pretimeout event\n", wdd->id); +} +#endif + /* drivers/watchdog/watchdog_core.c */ void watchdog_set_restart_priority(struct watchdog_device *wdd, int priority); extern int watchdog_init_timeout(struct watchdog_device *wdd, -- cgit v1.2.3 From 4bcc595ccd80decb4245096e3d1258989c50ed41 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 8 Oct 2016 20:32:40 -0700 Subject: printk: reinstate KERN_CONT for printing continuation lines Long long ago the kernel log buffer was a buffered stream of bytes, very much like stdio in user space. It supported log levels by scanning the stream and noticing the log level markers at the beginning of each line, but if you wanted to print a partial line in multiple chunks, you just did multiple printk() calls, and it just automatically worked. Except when it didn't, and you had very confusing output when different lines got all mixed up with each other. Then you got fragment lines mixing with each other, or with non-fragment lines, because it was traditionally impossible to tell whether a printk() call was a continuation or not. To at least help clarify the issue of continuation lines, we added a KERN_CONT marker back in 2007 to mark continuation lines: 474925277671 ("printk: add KERN_CONT annotation"). That continuation marker was initially an empty string, and didn't actuall make any semantic difference. But it at least made it possible to annotate the source code, and have check-patch notice that a printk() didn't need or want a log level marker, because it was a continuation of a previous line. To avoid the ambiguity between a continuation line that had that KERN_CONT marker, and a printk with no level information at all, we then in 2009 made KERN_CONT be a real log level marker which meant that we could now reliably tell the difference between the two cases. 5fd29d6ccbc9 ("printk: clean up handling of log-levels and newlines") and we could take advantage of that to make sure we didn't mix up continuation lines with lines that just didn't have any loglevel at all. Then, in 2012, the kernel log buffer was changed to be a "record" based log, where each line was a record that has a loglevel and a timestamp. You can see the beginning of that conversion in commits e11fea92e13f ("kmsg: export printk records to the /dev/kmsg interface") 7ff9554bb578 ("printk: convert byte-buffer to variable-length record buffer") with a number of follow-up commits to fix some painful fallout from that conversion. Over all, it took a couple of months to sort out most of it. But the upside was that you could have concurrent readers (and writers) of the kernel log and not have lines with mixed output in them. And one particular pain-point for the record-based kernel logging was exactly the fragmentary lines that are generated in smaller chunks. In order to still log them as one recrod, the continuation lines need to be attached to the previous record properly. However the explicit continuation record marker that is actually useful for this exact case was actually removed in aroundm the same time by commit 61e99ab8e35a ("printk: remove the now unnecessary "C" annotation for KERN_CONT") due to the incorrect belief that KERN_CONT wasn't meaningful. The ambiguity between "is this a continuation line" or "is this a plain printk with no log level information" was reintroduced, and in fact became an even bigger pain point because there was now the whole record-level merging of kernel messages going on. This patch reinstates the KERN_CONT as a real non-empty string marker, so that the ambiguity is fixed once again. But it's not a plain revert of that original removal: in the four years since we made KERN_CONT an empty string again, not only has the format of the log level markers changed, we've also had some usage changes in this area. For example, some ACPI code seems to use KERN_CONT _together_ with a log level, and now uses both the KERN_CONT marker and (for example) a KERN_INFO marker to show that it's an informational continuation of a line. Which is actually not a bad idea - if the continuation line cannot be attached to its predecessor, without the log level information we don't know what log level to assign to it (and we traditionally just assigned it the default loglevel). So having both a log level and the KERN_CONT marker is not necessarily a bad idea, but it does mean that we need to actually iterate over potentially multiple markers, rather than just a single one. Also, since KERN_CONT was still conceptually needed, and encouraged, but didn't actually _do_ anything, we've also had the reverse problem: rather than having too many annotations it has too few, and there is bit rot with code that no longer marks the continuation lines with the KERN_CONT marker. So this patch not only re-instates the non-empty KERN_CONT marker, it also fixes up the cases of bit-rot I noticed in my own logs. There are probably other cases where KERN_CONT will be needed to be added, either because it is new code that never dealt with the need for KERN_CONT, or old code that has bitrotted without anybody noticing. That said, we should strive to avoid the need for KERN_CONT. It does result in real problems for logging, and should generally not be seen as a good feature. If we some day can get rid of the feature entirely, because nobody does any fragmented printk calls, that would be lovely. But until that point, let's at mark the code that relies on the hacky multi-fragment kernel printk's. Not only does it avoid the ambiguity, it also annotates code as "maybe this would be good to fix some day". (That said, particularly during single-threaded bootup, the downsides of KERN_CONT are very limited. Things get much hairier when you have multiple threads going on and user level reading and writing logs too). Signed-off-by: Linus Torvalds --- include/linux/kern_levels.h | 2 +- include/linux/printk.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kern_levels.h b/include/linux/kern_levels.h index c2ce155d83cc..f282d4e87258 100644 --- a/include/linux/kern_levels.h +++ b/include/linux/kern_levels.h @@ -20,7 +20,7 @@ * line that had no enclosing \n). Only to be used by core/arch code * during early bootup (a continued line is not SMP-safe otherwise). */ -#define KERN_CONT "" +#define KERN_CONT KERN_SOH "c" /* integer equivalents of KERN_ */ #define LOGLEVEL_SCHED -2 /* Deferred messages from sched code diff --git a/include/linux/printk.h b/include/linux/printk.h index 696a56be7d3e..eac1af8502bb 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -16,6 +16,7 @@ static inline int printk_get_level(const char *buffer) switch (buffer[1]) { case '0' ... '7': case 'd': /* KERN_DEFAULT */ + case 'c': /* KERN_CONT */ return buffer[1]; } } -- cgit v1.2.3 From b57332b4105abf1d518d93886e547ee2f98cd414 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Oct 2016 13:57:37 -0400 Subject: constify iov_iter_count() and iter_is_iovec() Signed-off-by: Al Viro --- include/linux/uio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index b5ebe6dca404..544c03552b99 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -110,12 +110,12 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); -static inline size_t iov_iter_count(struct iov_iter *i) +static inline size_t iov_iter_count(const struct iov_iter *i) { return i->count; } -static inline bool iter_is_iovec(struct iov_iter *i) +static inline bool iter_is_iovec(const struct iov_iter *i) { return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE)); } -- cgit v1.2.3 From 38addce8b600ca335dc86fa3d48c890f1c6fa1f4 Mon Sep 17 00:00:00 2001 From: Emese Revfy Date: Mon, 20 Jun 2016 20:41:19 +0200 Subject: gcc-plugins: Add latent_entropy plugin This adds a new gcc plugin named "latent_entropy". It is designed to extract as much possible uncertainty from a running system at boot time as possible, hoping to capitalize on any possible variation in CPU operation (due to runtime data differences, hardware differences, SMP ordering, thermal timing variation, cache behavior, etc). At the very least, this plugin is a much more comprehensive example for how to manipulate kernel code using the gcc plugin internals. The need for very-early boot entropy tends to be very architecture or system design specific, so this plugin is more suited for those sorts of special cases. The existing kernel RNG already attempts to extract entropy from reliable runtime variation, but this plugin takes the idea to a logical extreme by permuting a global variable based on any variation in code execution (e.g. a different value (and permutation function) is used to permute the global based on loop count, case statement, if/then/else branching, etc). To do this, the plugin starts by inserting a local variable in every marked function. The plugin then adds logic so that the value of this variable is modified by randomly chosen operations (add, xor and rol) and random values (gcc generates separate static values for each location at compile time and also injects the stack pointer at runtime). The resulting value depends on the control flow path (e.g., loops and branches taken). Before the function returns, the plugin mixes this local variable into the latent_entropy global variable. The value of this global variable is added to the kernel entropy pool in do_one_initcall() and _do_fork(), though it does not credit any bytes of entropy to the pool; the contents of the global are just used to mix the pool. Additionally, the plugin can pre-initialize arrays with build-time random contents, so that two different kernel builds running on identical hardware will not have the same starting values. Signed-off-by: Emese Revfy [kees: expanded commit message and code comments] Signed-off-by: Kees Cook --- include/linux/random.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 3d6e9815cd85..a59c74cdb1eb 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -18,6 +18,17 @@ struct random_ready_callback { }; extern void add_device_randomness(const void *, unsigned int); + +#if defined(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) && !defined(__CHECKER__) +static inline void add_latent_entropy(void) +{ + add_device_randomness((const void *)&latent_entropy, + sizeof(latent_entropy)); +} +#else +static inline void add_latent_entropy(void) {} +#endif + extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); extern void add_interrupt_randomness(int irq, int irq_flags); -- cgit v1.2.3 From 0766f788eb727e2e330d55d30545db65bcf2623f Mon Sep 17 00:00:00 2001 From: Emese Revfy Date: Mon, 20 Jun 2016 20:42:34 +0200 Subject: latent_entropy: Mark functions with __latent_entropy The __latent_entropy gcc attribute can be used only on functions and variables. If it is on a function then the plugin will instrument it for gathering control-flow entropy. If the attribute is on a variable then the plugin will initialize it with random contents. The variable must be an integer, an integer array type or a structure with integer fields. These specific functions have been selected because they are init functions (to help gather boot-time entropy), are called at unpredictable times, or they have variable loops, each of which provide some level of latent entropy. Signed-off-by: Emese Revfy [kees: expanded commit message] Signed-off-by: Kees Cook --- include/linux/compiler-gcc.h | 7 +++++++ include/linux/compiler.h | 4 ++++ include/linux/fdtable.h | 2 +- include/linux/genhd.h | 2 +- include/linux/init.h | 5 +++-- include/linux/random.h | 4 ++-- 6 files changed, 18 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 573c5a18908f..432f5c97e18f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -188,6 +188,13 @@ #endif /* GCC_VERSION >= 40300 */ #if GCC_VERSION >= 40500 + +#ifndef __CHECKER__ +#ifdef LATENT_ENTROPY_PLUGIN +#define __latent_entropy __attribute__((latent_entropy)) +#endif +#endif + /* * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 668569844d37..ceaddaf76ff1 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -406,6 +406,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s # define __attribute_const__ /* unimplemented */ #endif +#ifndef __latent_entropy +# define __latent_entropy +#endif + /* * Tell gcc if a function is cold. The compiler will assume any path * directly leading to the call is unlikely. diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 5295535b60c6..9852c7e33466 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -105,7 +105,7 @@ struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); -struct files_struct *dup_fd(struct files_struct *, int *); +struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy; void do_close_on_exec(struct files_struct *); int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1dbf52f9c24b..e0341af6950e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -437,7 +437,7 @@ extern void disk_flush_events(struct gendisk *disk, unsigned int mask); extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); /* drivers/char/random.c */ -extern void add_disk_randomness(struct gendisk *disk); +extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) diff --git a/include/linux/init.h b/include/linux/init.h index 6935d02474aa..1e5c131d5c9a 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -39,7 +39,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold notrace +#define __init __section(.init.text) __cold notrace __latent_entropy #define __initdata __section(.init.data) #define __initconst __constsection(.init.rodata) #define __exitdata __section(.exit.data) @@ -86,7 +86,8 @@ #define __exit __section(.exit.text) __exitused __cold notrace /* Used for MEMORY_HOTPLUG */ -#define __meminit __section(.meminit.text) __cold notrace +#define __meminit __section(.meminit.text) __cold notrace \ + __latent_entropy #define __meminitdata __section(.meminit.data) #define __meminitconst __constsection(.meminit.rodata) #define __memexit __section(.memexit.text) __exitused __cold notrace diff --git a/include/linux/random.h b/include/linux/random.h index a59c74cdb1eb..d80a4388a4fd 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -30,8 +30,8 @@ static inline void add_latent_entropy(void) {} #endif extern void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value); -extern void add_interrupt_randomness(int irq, int irq_flags); + unsigned int value) __latent_entropy; +extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); -- cgit v1.2.3 From b60e4ea4a400bde8a4811f94b84a9bb65f81b677 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 29 Sep 2016 16:39:41 +0300 Subject: ACPI / property: Allow holes in reference properties DT allows holes or empty phandles for references. This is used for example in SPI subsystem where some chip selects are native and others are regular GPIOs. In ACPI _DSD we currently do not support this but instead the preceding reference consumes all following integer arguments. For example we would like to support something like the below ASL fragment for SPI: Package () { "cs-gpios", Package () { ^GPIO, 19, 0, 0, // GPIO CS0 0, // Native CS ^GPIO, 20, 0, 0, // GPIO CS1 } } The zero in the middle means "no entry" or NULL reference. To support this we change acpi_data_get_property_reference() to take firmware node and num_args as argument and rename it to __acpi_node_get_property_reference(). The function returns -ENOENT if the given index resolves to "no entry" reference and -ENODATA when there are no more entries in the property. We then add static inline wrapper acpi_node_get_property_reference() that passes MAX_ACPI_REFERENCE_ARGS as num_args to support the existing behaviour which some drivers have been relying on. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4d8452c2384b..632ec16a855e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -927,9 +927,17 @@ struct acpi_reference_args { #ifdef CONFIG_ACPI int acpi_dev_get_property(struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj); -int acpi_node_get_property_reference(struct fwnode_handle *fwnode, - const char *name, size_t index, - struct acpi_reference_args *args); +int __acpi_node_get_property_reference(struct fwnode_handle *fwnode, + const char *name, size_t index, size_t num_args, + struct acpi_reference_args *args); + +static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode, + const char *name, size_t index, + struct acpi_reference_args *args) +{ + return __acpi_node_get_property_reference(fwnode, name, index, + MAX_ACPI_REFERENCE_ARGS, args); +} int acpi_node_prop_get(struct fwnode_handle *fwnode, const char *propname, void **valptr); @@ -1005,6 +1013,14 @@ static inline int acpi_dev_get_property(struct acpi_device *adev, return -ENXIO; } +static inline int +__acpi_node_get_property_reference(struct fwnode_handle *fwnode, + const char *name, size_t index, size_t num_args, + struct acpi_reference_args *args) +{ + return -ENXIO; +} + static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode, const char *name, size_t index, struct acpi_reference_args *args) -- cgit v1.2.3 From 915045fe15a5fc376f263d594aee4fca4fba5323 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 11 Oct 2016 13:51:18 -0700 Subject: radix-tree: 'slot' can be NULL in radix_tree_next_slot() There are four cases I can see where we could end up with a NULL 'slot' in radix_tree_next_slot(). Yet radix_tree_next_slot() never actually checks whether 'slot' is NULL. It just happens that for the cases where 'slot' is NULL, some other combination of factors prevents us from dereferencing it. It would be very easy for someone to unwittingly change one of these factors without realizing that we are implicitly depending on it to save us from a NULL pointer dereference. Add a comment documenting the things that allow 'slot' to be safely passed as NULL to radix_tree_next_slot(). Here are details on the four cases: 1) radix_tree_iter_retry() via a non-tagged iteration like radix_tree_for_each_slot(). In this case we currently aren't seeing a bug because radix_tree_iter_retry() sets iter->next_index = iter->index; which means that in in the else case in radix_tree_next_slot(), 'count' is zero, so we skip over the while() loop and effectively just return NULL without ever dereferencing 'slot'. 2) radix_tree_iter_retry() via tagged iteration like radix_tree_for_each_tagged(). This case was giving us NULL pointer dereferences in testing, and was fixed with this commit: commit 3cb9185c6730 ("radix-tree: fix radix_tree_iter_retry() for tagged iterators.") This fix doesn't explicitly check for 'slot' being NULL, though, it works around the NULL pointer dereference by instead zeroing iter->tags in radix_tree_iter_retry(), which makes us bail out of the if() case in radix_tree_next_slot() before we dereference 'slot'. 3) radix_tree_iter_next() via via a non-tagged iteration like radix_tree_for_each_slot(). This currently happens in shmem_tag_pins() and shmem_partial_swap_usage(). As with non-tagged iteration, 'count' in the else case of radix_tree_next_slot() is zero, so we skip over the while() loop and effectively just return NULL without ever dereferencing 'slot'. 4) radix_tree_iter_next() via tagged iteration like radix_tree_for_each_tagged(). This happens in shmem_wait_for_pins(). radix_tree_iter_next() zeros out iter->tags, so we end up exiting radix_tree_next_slot() here: if (flags & RADIX_TREE_ITER_TAGGED) { void *canon = slot; iter->tags >>= 1; if (unlikely(!iter->tags)) return NULL; Link: http://lkml.kernel.org/r/20160815194237.25967-2-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 52b97db93830..af3581b8a451 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -461,6 +461,14 @@ static inline struct radix_tree_node *entry_to_node(void *ptr) * * This function updates @iter->index in the case of a successful lookup. * For tagged lookup it also eats @iter->tags. + * + * There are several cases where 'slot' can be passed in as NULL to this + * function. These cases result from the use of radix_tree_iter_next() or + * radix_tree_iter_retry(). In these cases we don't end up dereferencing + * 'slot' because either: + * a) we are doing tagged iteration and iter->tags has been set to 0, or + * b) we are doing non-tagged iteration, and iter->index and iter->next_index + * have been set up so that radix_tree_chunk_size() returns 1 or 0. */ static __always_inline void ** radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) -- cgit v1.2.3 From 1204c77f9b6ab8ba8cc6cfe00342f5e64a740cdf Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Oct 2016 13:51:30 -0700 Subject: include/linux/ctype.h: make isdigit() table lookupless Make isdigit into a simple range checking inline function: return '0' <= c && c <= '9'; This code is 1 branch, not 2 because any reasonable compiler can optimize this code into SUB+CMP, so the code while (isdigit((c = *s++))) ... remains 1 branch per iteration HOWEVER it suddenly doesn't do table lookup priming cacheline nobody cares about. Link: http://lkml.kernel.org/r/20160826190047.GA12536@p183.telecom.by Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ctype.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ctype.h b/include/linux/ctype.h index 653589e3e30e..f13e4ff6835a 100644 --- a/include/linux/ctype.h +++ b/include/linux/ctype.h @@ -22,7 +22,10 @@ extern const unsigned char _ctype[]; #define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) #define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) #define iscntrl(c) ((__ismask(c)&(_C)) != 0) -#define isdigit(c) ((__ismask(c)&(_D)) != 0) +static inline int isdigit(int c) +{ + return '0' <= c && c <= '9'; +} #define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) #define islower(c) ((__ismask(c)&(_L)) != 0) #define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) -- cgit v1.2.3 From 72063e01eda7e7562702bbf790380104bf704379 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Tue, 11 Oct 2016 13:52:51 -0700 Subject: autofs: remove AUTOFS_DEVID_LEN This macro was never used by neither kernel nor userspace, and also doesn't represent "devid length" in bytes. (unless it was added to mean something else). Link: http://lkml.kernel.org/r/20160812024820.12352.21210.stgit@pluto.themaw.net Signed-off-by: Tomohiro Kusumi Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_dev-ioctl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index 7caaf298f539..bf82e3a758e5 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -18,8 +18,6 @@ #define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1 #define AUTOFS_DEV_IOCTL_VERSION_MINOR 0 -#define AUTOFS_DEVID_LEN 16 - #define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) /* -- cgit v1.2.3 From f58b3c91f6786c66483fc18fd8b82a74cbf96d19 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Tue, 11 Oct 2016 13:53:10 -0700 Subject: autofs: move inclusion of linux/limits.h to uapi linux/limits.h should be included by uapi instead of linux/auto_fs.h so as not to cause compile error in userspace. # cat << EOF > ./test1.c > #include > #include > int main(void) { > return 0; > } > EOF # gcc -Wall -g ./test1.c In file included from ./test1.c:2:0: /usr/include/linux/auto_fs.h:54:12: error: 'NAME_MAX' undeclared here (not in a function) char name[NAME_MAX+1]; ^ Link: http://lkml.kernel.org/r/20160812024856.12352.24092.stgit@pluto.themaw.net Signed-off-by: Tomohiro Kusumi Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index b4066bb89083..b8f814c95cf5 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h @@ -10,7 +10,6 @@ #define _LINUX_AUTO_FS_H #include -#include #include #include #endif /* _LINUX_AUTO_FS_H */ -- cgit v1.2.3 From 9b88ee0f3bb4c5b1e721bdcee93601b501d72f0a Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Tue, 11 Oct 2016 13:53:13 -0700 Subject: autofs4: move linux/auto_dev-ioctl.h to uapi/linux Since linux/auto_dev-ioctl.h wasn't included in include/linux/Kbuild it wasn't moved to uapi/linux as part of the uapi series. Link: http://lkml.kernel.org/r/20160812024901.12352.10984.stgit@pluto.themaw.net Signed-off-by: Ian Kent Cc: Tomohiro Kusumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_dev-ioctl.h | 209 +---------------------------------------- 1 file changed, 1 insertion(+), 208 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h index bf82e3a758e5..28c15050ebe6 100644 --- a/include/linux/auto_dev-ioctl.h +++ b/include/linux/auto_dev-ioctl.h @@ -10,212 +10,5 @@ #ifndef _LINUX_AUTO_DEV_IOCTL_H #define _LINUX_AUTO_DEV_IOCTL_H -#include -#include - -#define AUTOFS_DEVICE_NAME "autofs" - -#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1 -#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0 - -#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) - -/* - * An ioctl interface for autofs mount point control. - */ - -struct args_protover { - __u32 version; -}; - -struct args_protosubver { - __u32 sub_version; -}; - -struct args_openmount { - __u32 devid; -}; - -struct args_ready { - __u32 token; -}; - -struct args_fail { - __u32 token; - __s32 status; -}; - -struct args_setpipefd { - __s32 pipefd; -}; - -struct args_timeout { - __u64 timeout; -}; - -struct args_requester { - __u32 uid; - __u32 gid; -}; - -struct args_expire { - __u32 how; -}; - -struct args_askumount { - __u32 may_umount; -}; - -struct args_ismountpoint { - union { - struct args_in { - __u32 type; - } in; - struct args_out { - __u32 devid; - __u32 magic; - } out; - }; -}; - -/* - * All the ioctls use this structure. - * When sending a path size must account for the total length - * of the chunk of memory otherwise is is the size of the - * structure. - */ - -struct autofs_dev_ioctl { - __u32 ver_major; - __u32 ver_minor; - __u32 size; /* total size of data passed in - * including this struct */ - __s32 ioctlfd; /* automount command fd */ - - /* Command parameters */ - - union { - struct args_protover protover; - struct args_protosubver protosubver; - struct args_openmount openmount; - struct args_ready ready; - struct args_fail fail; - struct args_setpipefd setpipefd; - struct args_timeout timeout; - struct args_requester requester; - struct args_expire expire; - struct args_askumount askumount; - struct args_ismountpoint ismountpoint; - }; - - char path[0]; -}; - -static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) -{ - memset(in, 0, sizeof(struct autofs_dev_ioctl)); - in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; - in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; - in->size = sizeof(struct autofs_dev_ioctl); - in->ioctlfd = -1; -} - -/* - * If you change this make sure you make the corresponding change - * to autofs-dev-ioctl.c:lookup_ioctl() - */ -enum { - /* Get various version info */ - AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71, - AUTOFS_DEV_IOCTL_PROTOVER_CMD, - AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, - - /* Open mount ioctl fd */ - AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, - - /* Close mount ioctl fd */ - AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, - - /* Mount/expire status returns */ - AUTOFS_DEV_IOCTL_READY_CMD, - AUTOFS_DEV_IOCTL_FAIL_CMD, - - /* Activate/deactivate autofs mount */ - AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, - AUTOFS_DEV_IOCTL_CATATONIC_CMD, - - /* Expiry timeout */ - AUTOFS_DEV_IOCTL_TIMEOUT_CMD, - - /* Get mount last requesting uid and gid */ - AUTOFS_DEV_IOCTL_REQUESTER_CMD, - - /* Check for eligible expire candidates */ - AUTOFS_DEV_IOCTL_EXPIRE_CMD, - - /* Request busy status */ - AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, - - /* Check if path is a mountpoint */ - AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, -}; - -#define AUTOFS_IOCTL 0x93 - -#define AUTOFS_DEV_IOCTL_VERSION \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_PROTOVER \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_PROTOSUBVER \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_OPENMOUNT \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_READY \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_FAIL \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_SETPIPEFD \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_CATATONIC \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_TIMEOUT \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_REQUESTER \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_EXPIRE \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_ASKUMOUNT \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl) - -#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \ - _IOWR(AUTOFS_IOCTL, \ - AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl) - +#include #endif /* _LINUX_AUTO_DEV_IOCTL_H */ -- cgit v1.2.3 From 99fdafdeacfa99ca9047641b684fa2aaf094a661 Mon Sep 17 00:00:00 2001 From: Jason Cooper Date: Tue, 11 Oct 2016 13:53:52 -0700 Subject: random: simplify API for random address requests To date, all callers of randomize_range() have set the length to 0, and check for a zero return value. For the current callers, the only way to get zero returned is if end <= start. Since they are all adding a constant to the start address, this is unnecessary. We can remove a bunch of needless checks by simplifying the API to do just what everyone wants, return an address between [start, start + range). While we're here, s/get_random_int/get_random_long/. No current call site is adversely affected by get_random_int(), since all current range requests are < UINT_MAX. However, we should match caller expectations to avoid coming up short (ha!) in the future. All current callers to randomize_range() chose to use the start address if randomize_range() failed. Therefore, we simplify things by just returning the start address on error. randomize_range() will be removed once all callers have been converted over to randomize_addr(). Link: http://lkml.kernel.org/r/20160803233913.32511-2-jason@lakedaemon.net Signed-off-by: Jason Cooper Acked-by: Kees Cook Cc: Michael Ellerman Cc: "Roberts, William C" Cc: Yann Droneaud Cc: Russell King Cc: "Theodore Ts'o" Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H . Peter Anvin" Cc: Nick Kralevich Cc: Jeffrey Vander Stoep Cc: Daniel Cashman Cc: Chris Metcalf Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 3d6e9815cd85..26741892828a 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -35,6 +35,7 @@ extern const struct file_operations random_fops, urandom_fops; unsigned int get_random_int(void); unsigned long get_random_long(void); unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); +unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); void prandom_bytes(void *buf, size_t nbytes); -- cgit v1.2.3 From 7425154d3bbf5fcc7554738cab6dfac559ffbdda Mon Sep 17 00:00:00 2001 From: Jason Cooper Date: Tue, 11 Oct 2016 13:54:11 -0700 Subject: random: remove unused randomize_range() All call sites for randomize_range have been updated to use the much simpler and more robust randomize_addr(). Remove the now unnecessary code. Link: http://lkml.kernel.org/r/20160803233913.32511-8-jason@lakedaemon.net Signed-off-by: Jason Cooper Acked-by: Kees Cook Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/random.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 26741892828a..f7bb7a355cf7 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -34,7 +34,6 @@ extern const struct file_operations random_fops, urandom_fops; unsigned int get_random_int(void); unsigned long get_random_long(void); -unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); -- cgit v1.2.3 From a9a62c9384417545620aee1b5ad1d9357350c17a Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Tue, 11 Oct 2016 13:54:14 -0700 Subject: dma-mapping: introduce the DMA_ATTR_NO_WARN attribute Introduce the DMA_ATTR_NO_WARN attribute, and document it. Link: http://lkml.kernel.org/r/1470092390-25451-2-git-send-email-mauricfo@linux.vnet.ibm.com Signed-off-by: Mauricio Faria de Oliveira Cc: Keith Busch Cc: Jens Axboe Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Krzysztof Kozlowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0f90eb5e3c6b..08528afdf58b 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -56,6 +56,11 @@ * that gives better TLB efficiency. */ #define DMA_ATTR_ALLOC_SINGLE_PAGES (1UL << 7) +/* + * DMA_ATTR_NO_WARN: This tells the DMA-mapping subsystem to suppress + * allocation failure reports (similarly to __GFP_NOWARN). + */ +#define DMA_ATTR_NO_WARN (1UL << 8) /* * A dma_addr_t can hold any valid DMA or bus address for the platform. -- cgit v1.2.3 From 26b5679e437ef4f83db66437981c7c0d569973b1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 11 Oct 2016 13:54:33 -0700 Subject: relay: Use irq_work instead of plain timer for deferred wakeup Relay avoids calling wake_up_interruptible() for doing the wakeup of readers/consumers, waiting for the generation of new data, from the context of a process which produced the data. This is apparently done to prevent the possibility of a deadlock in case Scheduler itself is is generating data for the relay, after acquiring rq->lock. The following patch used a timer (to be scheduled at next jiffy), for delegating the wakeup to another context. commit 7c9cb38302e78d24e37f7d8a2ea7eed4ae5f2fa7 Author: Tom Zanussi Date: Wed May 9 02:34:01 2007 -0700 relay: use plain timer instead of delayed work relay doesn't need to use schedule_delayed_work() for waking readers when a simple timer will do. Scheduling a plain timer, at next jiffies boundary, to do the wakeup causes a significant wakeup latency for the Userspace client, which makes relay less suitable for the high-frequency low-payload use cases where the data gets generated at a very high rate, like multiple sub buffers getting filled within a milli second. Moreover the timer is re-scheduled on every newly produced sub buffer so the timer keeps getting pushed out if sub buffers are filled in a very quick succession (less than a jiffy gap between filling of 2 sub buffers). As a result relay runs out of sub buffers to store the new data. By using irq_work it is ensured that wakeup of userspace client, blocked in the poll call, is done at earliest (through self IPI or next timer tick) enabling it to always consume the data in time. Also this makes relay consistent with printk & ring buffers (trace), as they too use irq_work for deferred wake up of readers. [arnd@arndb.de: select CONFIG_IRQ_WORK] Link: http://lkml.kernel.org/r/20160912154035.3222156-1-arnd@arndb.de [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1472906487-1559-1-git-send-email-akash.goel@intel.com Signed-off-by: Peter Zijlstra Signed-off-by: Akash Goel Cc: Tom Zanussi Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relay.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/relay.h b/include/linux/relay.h index ecbb34a382b8..68c1448e56bb 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,7 @@ struct rchan_buf size_t subbufs_consumed; /* count of sub-buffers consumed */ struct rchan *chan; /* associated channel */ wait_queue_head_t read_wait; /* reader wait queue */ - struct timer_list timer; /* reader wake-up timer */ + struct irq_work wakeup_work; /* reader wakeup */ struct dentry *dentry; /* channel file dentry */ struct kref kref; /* channel buffer refcount */ struct page **page_array; /* array of current buffer pages */ -- cgit v1.2.3 From 5864a2fd3088db73d47942370d0f7210a807b9bc Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Tue, 11 Oct 2016 13:54:50 -0700 Subject: ipc/sem.c: fix complex_count vs. simple op race Commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") introduced a race: sem_lock has a fast path that allows parallel simple operations. There are two reasons why a simple operation cannot run in parallel: - a non-simple operations is ongoing (sma->sem_perm.lock held) - a complex operation is sleeping (sma->complex_count != 0) As both facts are stored independently, a thread can bypass the current checks by sleeping in the right positions. See below for more details (or kernel bugzilla 105651). The patch fixes that by creating one variable (complex_mode) that tracks both reasons why parallel operations are not possible. The patch also updates stale documentation regarding the locking. With regards to stable kernels: The patch is required for all kernels that include the commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") (3.10?) The alternative is to revert the patch that introduced the race. The patch is safe for backporting, i.e. it makes no assumptions about memory barriers in spin_unlock_wait(). Background: Here is the race of the current implementation: Thread A: (simple op) - does the first "sma->complex_count == 0" test Thread B: (complex op) - does sem_lock(): This includes an array scan. But the scan can't find Thread A, because Thread A does not own sem->lock yet. - the thread does the operation, increases complex_count, drops sem_lock, sleeps Thread A: - spin_lock(&sem->lock), spin_is_locked(sma->sem_perm.lock) - sleeps before the complex_count test Thread C: (complex op) - does sem_lock (no array scan, complex_count==1) - wakes up Thread B. - decrements complex_count Thread A: - does the complex_count test Bug: Now both thread A and thread C operate on the same array, without any synchronization. Fixes: 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") Link: http://lkml.kernel.org/r/1469123695-5661-1-git-send-email-manfred@colorfullife.com Reported-by: Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Davidlohr Bueso Cc: Thomas Gleixner Cc: Ingo Molnar Cc: <1vier1@web.de> Cc: [3.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sem.h b/include/linux/sem.h index 976ce3a19f1b..d0efd6e6c20a 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -21,6 +21,7 @@ struct sem_array { struct list_head list_id; /* undo requests on this array */ int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ + bool complex_mode; /* no parallel simple ops */ }; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3 From 0549a3c02efb350776bc869685a361045efd3a29 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 11 Oct 2016 13:55:08 -0700 Subject: kdump, vmcoreinfo: report memory sections virtual addresses KASLR memory randomization can randomize the base of the physical memory mapping (PAGE_OFFSET), vmalloc (VMALLOC_START) and vmemmap (VMEMMAP_START). Adding these variables on VMCOREINFO so tools can easily identify the base of each memory section. Link: http://lkml.kernel.org/r/1471531632-23003-1-git-send-email-thgarnie@google.com Signed-off-by: Thomas Garnier Acked-by: Baoquan He Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H . Peter Anvin" Cc: Eric Biederman Cc: Xunlei Pang Cc: HATAYAMA Daisuke Cc: Kees Cook Cc: Eugene Surovegin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d7437777baaa..406c33dcae13 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -259,6 +259,12 @@ phys_addr_t paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) #define VMCOREINFO_CONFIG(name) \ vmcoreinfo_append_str("CONFIG_%s=y\n", #name) +#define VMCOREINFO_PAGE_OFFSET(value) \ + vmcoreinfo_append_str("PAGE_OFFSET=%lx\n", (unsigned long)value) +#define VMCOREINFO_VMALLOC_START(value) \ + vmcoreinfo_append_str("VMALLOC_START=%lx\n", (unsigned long)value) +#define VMCOREINFO_VMEMMAP_START(value) \ + vmcoreinfo_append_str("VMEMMAP_START=%lx\n", (unsigned long)value) extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; -- cgit v1.2.3 From 9099daed9c6991a512c1f74b92ec49daf9408cda Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 11 Oct 2016 13:55:11 -0700 Subject: mm: kmemleak: avoid using __va() on addresses that don't have a lowmem mapping Some of the kmemleak_*() callbacks in memblock, bootmem, CMA convert a physical address to a virtual one using __va(). However, such physical addresses may sometimes be located in highmem and using __va() is incorrect, leading to inconsistent object tracking in kmemleak. The following functions have been added to the kmemleak API and they take a physical address as the object pointer. They only perform the corresponding action if the address has a lowmem mapping: kmemleak_alloc_phys kmemleak_free_part_phys kmemleak_not_leak_phys kmemleak_ignore_phys The affected calling places have been updated to use the new kmemleak API. Link: http://lkml.kernel.org/r/1471531432-16503-1-git-send-email-catalin.marinas@arm.com Signed-off-by: Catalin Marinas Reported-by: Vignesh R Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmemleak.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 4894c6888bc6..1c2a32829620 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -38,6 +38,11 @@ extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; +extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, + gfp_t gfp) __ref; +extern void kmemleak_free_part_phys(phys_addr_t phys, size_t size) __ref; +extern void kmemleak_not_leak_phys(phys_addr_t phys) __ref; +extern void kmemleak_ignore_phys(phys_addr_t phys) __ref; static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, int min_count, unsigned long flags, @@ -106,6 +111,19 @@ static inline void kmemleak_erase(void **ptr) static inline void kmemleak_no_scan(const void *ptr) { } +static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size, + int min_count, gfp_t gfp) +{ +} +static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size) +{ +} +static inline void kmemleak_not_leak_phys(phys_addr_t phys) +{ +} +static inline void kmemleak_ignore_phys(phys_addr_t phys) +{ +} #endif /* CONFIG_DEBUG_KMEMLEAK */ -- cgit v1.2.3 From e700591ae03896c16974d4e1ab58eb296aaa5f59 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 11 Oct 2016 13:55:17 -0700 Subject: kthread: rename probe_kthread_data() to kthread_probe_data() Patch series "kthread: Kthread worker API improvements" The intention of this patchset is to make it easier to manipulate and maintain kthreads. Especially, I want to replace all the custom main cycles with a generic one. Also I want to make the kthreads sleep in a consistent state in a common place when there is no work. This patch (of 11): A good practice is to prefix the names of functions by the name of the subsystem. This patch fixes the name of probe_kthread_data(). The other wrong functions names are part of the kthread worker API and will be fixed separately. Link: http://lkml.kernel.org/r/1470754545-17632-2-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Suggested-by: Andrew Morton Acked-by: Tejun Heo Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Jiri Kosina Cc: Borislav Petkov Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index e691b6a23f72..c792ee1628d0 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -44,7 +44,7 @@ bool kthread_should_stop(void); bool kthread_should_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_data(struct task_struct *k); -void *probe_kthread_data(struct task_struct *k); +void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -- cgit v1.2.3 From 3989144f863ac576e6efba298d24b0b02a10d4bb Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 11 Oct 2016 13:55:20 -0700 Subject: kthread: kthread worker API cleanup A good practice is to prefix the names of functions by the name of the subsystem. The kthread worker API is a mix of classic kthreads and workqueues. Each worker has a dedicated kthread. It runs a generic function that process queued works. It is implemented as part of the kthread subsystem. This patch renames the existing kthread worker API to use the corresponding name from the workqueues API prefixed by kthread_: __init_kthread_worker() -> __kthread_init_worker() init_kthread_worker() -> kthread_init_worker() init_kthread_work() -> kthread_init_work() insert_kthread_work() -> kthread_insert_work() queue_kthread_work() -> kthread_queue_work() flush_kthread_work() -> kthread_flush_work() flush_kthread_worker() -> kthread_flush_worker() Note that the names of DEFINE_KTHREAD_WORK*() macros stay as they are. It is common that the "DEFINE_" prefix has precedence over the subsystem names. Note that INIT() macros and init() functions use different naming scheme. There is no good solution. There are several reasons for this solution: + "init" in the function names stands for the verb "initialize" aka "initialize worker". While "INIT" in the macro names stands for the noun "INITIALIZER" aka "worker initializer". + INIT() macros are used only in DEFINE() macros + init() functions are used close to the other kthread() functions. It looks much better if all the functions use the same scheme. + There will be also kthread_destroy_worker() that will be used close to kthread_cancel_work(). It is related to the init() function. Again it looks better if all functions use the same naming scheme. + there are several precedents for such init() function names, e.g. amd_iommu_init_device(), free_area_init_node(), jump_label_init_type(), regmap_init_mmio_clk(), + It is not an argument but it was inconsistent even before. [arnd@arndb.de: fix linux-next merge conflict] Link: http://lkml.kernel.org/r/20160908135724.1311726-1-arnd@arndb.de Link: http://lkml.kernel.org/r/1470754545-17632-3-git-send-email-pmladek@suse.com Suggested-by: Andrew Morton Signed-off-by: Petr Mladek Cc: Oleg Nesterov Cc: Tejun Heo Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Jiri Kosina Cc: Borislav Petkov Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c792ee1628d0..e2b095b8ca47 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -57,7 +57,7 @@ extern int tsk_fork_get_node(struct task_struct *tsk); * Simple work processor based on kthread. * * This provides easier way to make use of kthreads. A kthread_work - * can be queued and flushed using queue/flush_kthread_work() + * can be queued and flushed using queue/kthread_flush_work() * respectively. Queued kthread_works are processed by a kthread * running kthread_worker_fn(). */ @@ -99,23 +99,23 @@ struct kthread_work { */ #ifdef CONFIG_LOCKDEP # define KTHREAD_WORKER_INIT_ONSTACK(worker) \ - ({ init_kthread_worker(&worker); worker; }) + ({ kthread_init_worker(&worker); worker; }) # define DEFINE_KTHREAD_WORKER_ONSTACK(worker) \ struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker) #else # define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker) #endif -extern void __init_kthread_worker(struct kthread_worker *worker, +extern void __kthread_init_worker(struct kthread_worker *worker, const char *name, struct lock_class_key *key); -#define init_kthread_worker(worker) \ +#define kthread_init_worker(worker) \ do { \ static struct lock_class_key __key; \ - __init_kthread_worker((worker), "("#worker")->lock", &__key); \ + __kthread_init_worker((worker), "("#worker")->lock", &__key); \ } while (0) -#define init_kthread_work(work, fn) \ +#define kthread_init_work(work, fn) \ do { \ memset((work), 0, sizeof(struct kthread_work)); \ INIT_LIST_HEAD(&(work)->node); \ @@ -124,9 +124,9 @@ extern void __init_kthread_worker(struct kthread_worker *worker, int kthread_worker_fn(void *worker_ptr); -bool queue_kthread_work(struct kthread_worker *worker, +bool kthread_queue_work(struct kthread_worker *worker, struct kthread_work *work); -void flush_kthread_work(struct kthread_work *work); -void flush_kthread_worker(struct kthread_worker *worker); +void kthread_flush_work(struct kthread_work *work); +void kthread_flush_worker(struct kthread_worker *worker); #endif /* _LINUX_KTHREAD_H */ -- cgit v1.2.3 From fbae2d44aa1df72d0154be77eb4d71e1e34c0f8f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 11 Oct 2016 13:55:30 -0700 Subject: kthread: add kthread_create_worker*() Kthread workers are currently created using the classic kthread API, namely kthread_run(). kthread_worker_fn() is passed as the @threadfn parameter. This patch defines kthread_create_worker() and kthread_create_worker_on_cpu() functions that hide implementation details. They enforce using kthread_worker_fn() for the main thread. But I doubt that there are any plans to create any alternative. In fact, I think that we do not want any alternative main thread because it would be hard to support consistency with the rest of the kthread worker API. The naming and function of kthread_create_worker() is inspired by the workqueues API like the rest of the kthread worker API. The kthread_create_worker_on_cpu() variant is motivated by the original kthread_create_on_cpu(). Note that we need to bind per-CPU kthread workers already when they are created. It makes the life easier. kthread_bind() could not be used later for an already running worker. This patch does _not_ convert existing kthread workers. The kthread worker API need more improvements first, e.g. a function to destroy the worker. IMPORTANT: kthread_create_worker_on_cpu() allows to use any format of the worker name, in compare with kthread_create_on_cpu(). The good thing is that it is more generic. The bad thing is that most users will need to pass the cpu number in two parameters, e.g. kthread_create_worker_on_cpu(cpu, "helper/%d", cpu). To be honest, the main motivation was to avoid the need for an empty va_list. The only legal way was to create a helper function that would be called with an empty list. Other attempts caused compilation warnings or even errors on different architectures. There were also other alternatives, for example, using #define or splitting __kthread_create_worker(). The used solution looked like the least ugly. Link: http://lkml.kernel.org/r/1470754545-17632-6-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Acked-by: Tejun Heo Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Jiri Kosina Cc: Borislav Petkov Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index e2b095b8ca47..daeb2befbabf 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -124,6 +124,13 @@ extern void __kthread_init_worker(struct kthread_worker *worker, int kthread_worker_fn(void *worker_ptr); +__printf(1, 2) +struct kthread_worker * +kthread_create_worker(const char namefmt[], ...); + +struct kthread_worker * +kthread_create_worker_on_cpu(int cpu, const char namefmt[], ...); + bool kthread_queue_work(struct kthread_worker *worker, struct kthread_work *work); void kthread_flush_work(struct kthread_work *work); -- cgit v1.2.3 From 35033fe9cbbf18415dfeb7e27f0d4228dfc7458a Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 11 Oct 2016 13:55:33 -0700 Subject: kthread: add kthread_destroy_worker() The current kthread worker users call flush() and stop() explicitly. This function does the same plus it frees the kthread_worker struct in one call. It is supposed to be used together with kthread_create_worker*() that allocates struct kthread_worker. Link: http://lkml.kernel.org/r/1470754545-17632-7-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Cc: Oleg Nesterov Cc: Tejun Heo Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Jiri Kosina Cc: Borislav Petkov Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index daeb2befbabf..afc8939da861 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -136,4 +136,6 @@ bool kthread_queue_work(struct kthread_worker *worker, void kthread_flush_work(struct kthread_work *work); void kthread_flush_worker(struct kthread_worker *worker); +void kthread_destroy_worker(struct kthread_worker *worker); + #endif /* _LINUX_KTHREAD_H */ -- cgit v1.2.3 From 22597dc3d97b1ead2aca201397415a1a84bf2b26 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 11 Oct 2016 13:55:40 -0700 Subject: kthread: initial support for delayed kthread work We are going to use kthread_worker more widely and delayed works will be pretty useful. The implementation is inspired by workqueues. It uses a timer to queue the work after the requested delay. If the delay is zero, the work is queued immediately. In compare with workqueues, each work is associated with a single worker (kthread). Therefore the implementation could be much easier. In particular, we use the worker->lock to synchronize all the operations with the work. We do not need any atomic operation with a flags variable. In fact, we do not need any state variable at all. Instead, we add a list of delayed works into the worker. Then the pending work is listed either in the list of queued or delayed works. And the existing check of pending works is the same even for the delayed ones. A work must not be assigned to another worker unless reinitialized. Therefore the timer handler might expect that dwork->work->worker is valid and it could simply take the lock. We just add some sanity checks to help with debugging a potential misuse. Link: http://lkml.kernel.org/r/1470754545-17632-9-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Acked-by: Tejun Heo Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Jiri Kosina Cc: Borislav Petkov Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index afc8939da861..4acde1ae2228 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -63,10 +63,12 @@ extern int tsk_fork_get_node(struct task_struct *tsk); */ struct kthread_work; typedef void (*kthread_work_func_t)(struct kthread_work *work); +void kthread_delayed_work_timer_fn(unsigned long __data); struct kthread_worker { spinlock_t lock; struct list_head work_list; + struct list_head delayed_work_list; struct task_struct *task; struct kthread_work *current_work; }; @@ -77,9 +79,15 @@ struct kthread_work { struct kthread_worker *worker; }; +struct kthread_delayed_work { + struct kthread_work work; + struct timer_list timer; +}; + #define KTHREAD_WORKER_INIT(worker) { \ .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \ .work_list = LIST_HEAD_INIT((worker).work_list), \ + .delayed_work_list = LIST_HEAD_INIT((worker).delayed_work_list),\ } #define KTHREAD_WORK_INIT(work, fn) { \ @@ -87,12 +95,23 @@ struct kthread_work { .func = (fn), \ } +#define KTHREAD_DELAYED_WORK_INIT(dwork, fn) { \ + .work = KTHREAD_WORK_INIT((dwork).work, (fn)), \ + .timer = __TIMER_INITIALIZER(kthread_delayed_work_timer_fn, \ + 0, (unsigned long)&(dwork), \ + TIMER_IRQSAFE), \ + } + #define DEFINE_KTHREAD_WORKER(worker) \ struct kthread_worker worker = KTHREAD_WORKER_INIT(worker) #define DEFINE_KTHREAD_WORK(work, fn) \ struct kthread_work work = KTHREAD_WORK_INIT(work, fn) +#define DEFINE_KTHREAD_DELAYED_WORK(dwork, fn) \ + struct kthread_delayed_work dwork = \ + KTHREAD_DELAYED_WORK_INIT(dwork, fn) + /* * kthread_worker.lock needs its own lockdep class key when defined on * stack with lockdep enabled. Use the following macros in such cases. @@ -122,6 +141,15 @@ extern void __kthread_init_worker(struct kthread_worker *worker, (work)->func = (fn); \ } while (0) +#define kthread_init_delayed_work(dwork, fn) \ + do { \ + kthread_init_work(&(dwork)->work, (fn)); \ + __setup_timer(&(dwork)->timer, \ + kthread_delayed_work_timer_fn, \ + (unsigned long)(dwork), \ + TIMER_IRQSAFE); \ + } while (0) + int kthread_worker_fn(void *worker_ptr); __printf(1, 2) @@ -133,6 +161,11 @@ kthread_create_worker_on_cpu(int cpu, const char namefmt[], ...); bool kthread_queue_work(struct kthread_worker *worker, struct kthread_work *work); + +bool kthread_queue_delayed_work(struct kthread_worker *worker, + struct kthread_delayed_work *dwork, + unsigned long delay); + void kthread_flush_work(struct kthread_work *work); void kthread_flush_worker(struct kthread_worker *worker); -- cgit v1.2.3 From 37be45d49dec2a411e29d50c9597cfe8184b5645 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 11 Oct 2016 13:55:43 -0700 Subject: kthread: allow to cancel kthread work We are going to use kthread workers more widely and sometimes we will need to make sure that the work is neither pending nor running. This patch implements cancel_*_sync() operations as inspired by workqueues. Well, we are synchronized against the other operations via the worker lock, we use del_timer_sync() and a counter to count parallel cancel operations. Therefore the implementation might be easier. First, we check if a worker is assigned. If not, the work has newer been queued after it was initialized. Second, we take the worker lock. It must be the right one. The work must not be assigned to another worker unless it is initialized in between. Third, we try to cancel the timer when it exists. The timer is deleted synchronously to make sure that the timer call back is not running. We need to temporary release the worker->lock to avoid a possible deadlock with the callback. In the meantime, we set work->canceling counter to avoid any queuing. Fourth, we try to remove the work from a worker list. It might be the list of either normal or delayed works. Fifth, if the work is running, we call kthread_flush_work(). It might take an arbitrary time. We need to release the worker-lock again. In the meantime, we again block any queuing by the canceling counter. As already mentioned, the check for a pending kthread work is done under a lock. In compare with workqueues, we do not need to fight for a single PENDING bit to block other operations. Therefore we do not suffer from the thundering storm problem and all parallel canceling jobs might use kthread_flush_work(). Any queuing is blocked until the counter gets zero. Link: http://lkml.kernel.org/r/1470754545-17632-10-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Acked-by: Tejun Heo Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Jiri Kosina Cc: Borislav Petkov Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 4acde1ae2228..77435dcde707 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -77,6 +77,8 @@ struct kthread_work { struct list_head node; kthread_work_func_t func; struct kthread_worker *worker; + /* Number of canceling calls that are running at the moment. */ + int canceling; }; struct kthread_delayed_work { @@ -169,6 +171,9 @@ bool kthread_queue_delayed_work(struct kthread_worker *worker, void kthread_flush_work(struct kthread_work *work); void kthread_flush_worker(struct kthread_worker *worker); +bool kthread_cancel_work_sync(struct kthread_work *work); +bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); + void kthread_destroy_worker(struct kthread_worker *worker); #endif /* _LINUX_KTHREAD_H */ -- cgit v1.2.3 From 9a6b06c8d9a220860468aadb2f1c726570813bf9 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 11 Oct 2016 13:55:46 -0700 Subject: kthread: allow to modify delayed kthread work There are situations when we need to modify the delay of a delayed kthread work. For example, when the work depends on an event and the initial delay means a timeout. Then we want to queue the work immediately when the event happens. This patch implements kthread_mod_delayed_work() as inspired workqueues. It cancels the timer, removes the work from any worker list and queues it again with the given timeout. A very special case is when the work is being canceled at the same time. It might happen because of the regular kthread_cancel_delayed_work_sync() or by another kthread_mod_delayed_work(). In this case, we do nothing and let the other operation win. This should not normally happen as the caller is supposed to synchronize these operations a reasonable way. Link: http://lkml.kernel.org/r/1470754545-17632-11-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Acked-by: Tejun Heo Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Jiri Kosina Cc: Borislav Petkov Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 77435dcde707..5c2ec2c4eb22 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -168,6 +168,10 @@ bool kthread_queue_delayed_work(struct kthread_worker *worker, struct kthread_delayed_work *dwork, unsigned long delay); +bool kthread_mod_delayed_work(struct kthread_worker *worker, + struct kthread_delayed_work *dwork, + unsigned long delay); + void kthread_flush_work(struct kthread_work *work); void kthread_flush_worker(struct kthread_worker *worker); -- cgit v1.2.3 From dbf52682cb02863d22b15e3742988c7c6e3f1710 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 11 Oct 2016 13:55:50 -0700 Subject: kthread: better support freezable kthread workers This patch allows to make kthread worker freezable via a new @flags parameter. It will allow to avoid an init work in some kthreads. It currently does not affect the function of kthread_worker_fn() but it might help to do some optimization or fixes eventually. I currently do not know about any other use for the @flags parameter but I believe that we will want more flags in the future. Finally, I hope that it will not cause confusion with @flags member in struct kthread. Well, I guess that we will want to rework the basic kthreads implementation once all kthreads are converted into kthread workers or workqueues. It is possible that we will merge the two structures. Link: http://lkml.kernel.org/r/1470754545-17632-12-git-send-email-pmladek@suse.com Signed-off-by: Petr Mladek Acked-by: Tejun Heo Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Steven Rostedt Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Jiri Kosina Cc: Borislav Petkov Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 5c2ec2c4eb22..4f5235cb13bb 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -65,7 +65,12 @@ struct kthread_work; typedef void (*kthread_work_func_t)(struct kthread_work *work); void kthread_delayed_work_timer_fn(unsigned long __data); +enum { + KTW_FREEZABLE = 1 << 0, /* freeze during suspend */ +}; + struct kthread_worker { + unsigned int flags; spinlock_t lock; struct list_head work_list; struct list_head delayed_work_list; @@ -154,12 +159,13 @@ extern void __kthread_init_worker(struct kthread_worker *worker, int kthread_worker_fn(void *worker_ptr); -__printf(1, 2) +__printf(2, 3) struct kthread_worker * -kthread_create_worker(const char namefmt[], ...); +kthread_create_worker(unsigned int flags, const char namefmt[], ...); struct kthread_worker * -kthread_create_worker_on_cpu(int cpu, const char namefmt[], ...); +kthread_create_worker_on_cpu(int cpu, unsigned int flags, + const char namefmt[], ...); bool kthread_queue_work(struct kthread_worker *worker, struct kthread_work *work); -- cgit v1.2.3 From e154ccc831b5b52a9aa3fe881090bdaf1d80f062 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Tue, 11 Oct 2016 13:55:53 -0700 Subject: kthread: add kerneldoc for kthread_create() This macro is referenced in other kerneldoc comments, but lacks one of its own; fix that. Link: http://lkml.kernel.org/r/20160826072313.726a3485@lwn.net Signed-off-by: Jonathan Corbet Reported-by: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 4f5235cb13bb..a6e82a69c363 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -10,6 +10,17 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), int node, const char namefmt[], ...); +/** + * kthread_create - create a kthread on the current node + * @threadfn: the function to run in the thread + * @data: data pointer for @threadfn() + * @namefmt: printf-style format string for the thread name + * @...: arguments for @namefmt. + * + * This macro will create a kthread on the current node, leaving it in + * the stopped state. This is just a helper for kthread_create_on_node(); + * see the documentation there for more details. + */ #define kthread_create(threadfn, data, namefmt, arg...) \ kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg) -- cgit v1.2.3 From 97139d4a6f26445de47b378cddd5192c0278f863 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 11 Oct 2016 13:55:58 -0700 Subject: treewide: remove redundant #include Kernel source files need not include explicitly because the top Makefile forces to include it with: -include $(srctree)/include/linux/kconfig.h This commit removes explicit includes except the following: * arch/s390/include/asm/facilities_src.h * tools/testing/radix-tree/linux/kernel.h These two are used for host programs. Link: http://lkml.kernel.org/r/1473656164-11929-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/export.h | 1 - include/linux/gpio/driver.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index c565f87f005e..d7df4922da1d 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -78,7 +78,6 @@ extern struct module __this_module; #elif defined(CONFIG_TRIM_UNUSED_KSYMS) -#include #include #define __EXPORT_SYMBOL(sym, sec) \ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1f0be7213e6d..24e2cc56beb1 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -8,7 +8,6 @@ #include #include #include -#include struct gpio_desc; struct of_phandle_args; -- cgit v1.2.3 From 9c5d760b8d229b94c5030863a5edaee5f1a9d7b7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 11 Oct 2016 13:56:04 -0700 Subject: mm: split gfp_mask and mapping flags into separate fields mapping->flags currently encodes two different things into a single flag. It contains sticky gfp_mask for page cache allocations and AS_ codes used to report errors/enospace and other states which are mapping specific. Condensing the two semantically unrelated things saves few bytes but it also complicates other things. For one thing the gfp flags space is reduced and in fact we are already running out of available bits. It can be assumed that more gfp flags will be necessary later on. To not introduce the address_space grow (at least on x86_64) we can stick it right after private_lock because we have a hole there. struct address_space { struct inode * host; /* 0 8 */ struct radix_tree_root page_tree; /* 8 16 */ spinlock_t tree_lock; /* 24 4 */ atomic_t i_mmap_writable; /* 28 4 */ struct rb_root i_mmap; /* 32 8 */ struct rw_semaphore i_mmap_rwsem; /* 40 40 */ /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */ long unsigned int nrpages; /* 80 8 */ long unsigned int nrexceptional; /* 88 8 */ long unsigned int writeback_index; /* 96 8 */ const struct address_space_operations * a_ops; /* 104 8 */ long unsigned int flags; /* 112 8 */ spinlock_t private_lock; /* 120 4 */ /* XXX 4 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ struct list_head private_list; /* 128 16 */ void * private_data; /* 144 8 */ /* size: 152, cachelines: 3, members: 14 */ /* sum members: 148, holes: 1, sum holes: 4 */ /* last cacheline: 24 bytes */ }; Link: http://lkml.kernel.org/r/20160912114852.GI14524@dhcp22.suse.cz Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 ++- include/linux/pagemap.h | 20 +++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c145219286a8..bc65d5918140 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -440,8 +440,9 @@ struct address_space { unsigned long nrexceptional; pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ - unsigned long flags; /* error bits/gfp mask */ + unsigned long flags; /* error bits */ spinlock_t private_lock; /* for use by the address_space */ + gfp_t gfp_mask; /* implicit gfp mask for allocations */ struct list_head private_list; /* ditto */ void *private_data; /* ditto */ } __attribute__((aligned(sizeof(long)))); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 747f401cc312..dd15d39e1985 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -16,17 +16,16 @@ #include /* - * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page - * allocation mode flags. + * Bits in mapping->flags. */ enum mapping_flags { - AS_EIO = __GFP_BITS_SHIFT + 0, /* IO error on async write */ - AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */ - AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */ - AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */ - AS_EXITING = __GFP_BITS_SHIFT + 4, /* final truncate in progress */ + AS_EIO = 0, /* IO error on async write */ + AS_ENOSPC = 1, /* ENOSPC on async write */ + AS_MM_ALL_LOCKS = 2, /* under mm_take_all_locks() */ + AS_UNEVICTABLE = 3, /* e.g., ramdisk, SHM_LOCK */ + AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ - AS_NO_WRITEBACK_TAGS = __GFP_BITS_SHIFT + 5, + AS_NO_WRITEBACK_TAGS = 5, }; static inline void mapping_set_error(struct address_space *mapping, int error) @@ -78,7 +77,7 @@ static inline int mapping_use_writeback_tags(struct address_space *mapping) static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { - return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; + return mapping->gfp_mask; } /* Restricts the given gfp_mask to what the mapping allows. */ @@ -94,8 +93,7 @@ static inline gfp_t mapping_gfp_constraint(struct address_space *mapping, */ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) { - m->flags = (m->flags & ~(__force unsigned long)__GFP_BITS_MASK) | - (__force unsigned long)mask; + m->gfp_mask = mask; } void release_pages(struct page **pages, int nr, bool cold); -- cgit v1.2.3 From 899bb6642f2a2f2cd3f77abd6c5a14550e3b37e6 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 12 Oct 2016 08:45:05 +0530 Subject: cpufreq: skip invalid entries when searching the frequency Skip invalid entries when searching the frequency. This fixes cpufreq at least on loongson2 MIPS board. Fixes: da0c6dc00c69 (cpufreq: Handle sorted frequency tables more efficiently) Signed-off-by: Aaro Koskinen Signed-off-by: Viresh Kumar Cc: 4.8+ # 4.8+ Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 104 ++++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 631ba33bbe9f..5fa55fc56e18 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -639,19 +639,19 @@ static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy, unsigned int target_freq) { struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos, *best = table - 1; unsigned int freq; - int i, best = -1; - for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { - freq = table[i].frequency; + cpufreq_for_each_valid_entry(pos, table) { + freq = pos->frequency; if (freq >= target_freq) - return i; + return pos - table; - best = i; + best = pos; } - return best; + return best - table; } /* Find lowest freq at or above target in a table in descending order */ @@ -659,28 +659,28 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, unsigned int target_freq) { struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos, *best = table - 1; unsigned int freq; - int i, best = -1; - for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { - freq = table[i].frequency; + cpufreq_for_each_valid_entry(pos, table) { + freq = pos->frequency; if (freq == target_freq) - return i; + return pos - table; if (freq > target_freq) { - best = i; + best = pos; continue; } /* No freq found above target_freq */ - if (best == -1) - return i; + if (best == table - 1) + return pos - table; - return best; + return best - pos; } - return best; + return best - pos; } /* Works only on sorted freq-tables */ @@ -700,28 +700,28 @@ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq) { struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos, *best = table - 1; unsigned int freq; - int i, best = -1; - for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { - freq = table[i].frequency; + cpufreq_for_each_valid_entry(pos, table) { + freq = pos->frequency; if (freq == target_freq) - return i; + return pos - table; if (freq < target_freq) { - best = i; + best = pos; continue; } /* No freq found below target_freq */ - if (best == -1) - return i; + if (best == table - 1) + return pos - table; - return best; + return best - table; } - return best; + return best - table; } /* Find highest freq at or below target in a table in descending order */ @@ -729,19 +729,19 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, unsigned int target_freq) { struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos, *best = table - 1; unsigned int freq; - int i, best = -1; - for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { - freq = table[i].frequency; + cpufreq_for_each_valid_entry(pos, table) { + freq = pos->frequency; if (freq <= target_freq) - return i; + return pos - table; - best = i; + best = pos; } - return best; + return best - table; } /* Works only on sorted freq-tables */ @@ -761,32 +761,32 @@ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq) { struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos, *best = table - 1; unsigned int freq; - int i, best = -1; - for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { - freq = table[i].frequency; + cpufreq_for_each_valid_entry(pos, table) { + freq = pos->frequency; if (freq == target_freq) - return i; + return pos - table; if (freq < target_freq) { - best = i; + best = pos; continue; } /* No freq found below target_freq */ - if (best == -1) - return i; + if (best == table - 1) + return pos - table; /* Choose the closest freq */ - if (target_freq - table[best].frequency > freq - target_freq) - return i; + if (target_freq - best->frequency > freq - target_freq) + return pos - table; - return best; + return best - table; } - return best; + return best - table; } /* Find closest freq to target in a table in descending order */ @@ -794,32 +794,32 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, unsigned int target_freq) { struct cpufreq_frequency_table *table = policy->freq_table; + struct cpufreq_frequency_table *pos, *best = table - 1; unsigned int freq; - int i, best = -1; - for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { - freq = table[i].frequency; + cpufreq_for_each_valid_entry(pos, table) { + freq = pos->frequency; if (freq == target_freq) - return i; + return pos - table; if (freq > target_freq) { - best = i; + best = pos; continue; } /* No freq found above target_freq */ - if (best == -1) - return i; + if (best == table - 1) + return pos - table; /* Choose the closest freq */ - if (table[best].frequency - target_freq > target_freq - freq) - return i; + if (best->frequency - target_freq > target_freq - freq) + return pos - table; - return best; + return best - table; } - return best; + return best - table; } /* Works only on sorted freq-tables */ -- cgit v1.2.3 From b8a4ddb2e8f44f872fb93bbda2d541b27079fd2b Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 12 Oct 2016 04:57:10 +0300 Subject: net/mlx5: Add MLX5_ARRAY_SET64 to fix BUILD_BUG_ON I am hitting this in mlx5: drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c: In function reclaim_pages_cmd.clone.0: drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:346: error: call to __compiletime_assert_346 declared with attribute error: BUILD_BUG_ON failed: __mlx5_bit_off(manage_pages_out, pas[i]) % 64 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c: In function give_pages: drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:291: error: call to __compiletime_assert_291 declared with attribute error: BUILD_BUG_ON failed: __mlx5_bit_off(manage_pages_in, pas[i]) % 64 Problem is that this is doing a BUILD_BUG_ON on a non-constant expression because of trying to take offset of pas[i] in the structure. Fix is to create MLX5_ARRAY_SET64 that takes an additional argument that is the field index to separate between BUILD_BUG_ON on the array constant field and the indexed field to assign the value to. There are two callers of MLX5_SET64 that are trying to get a variable offset, change those to call MLX5_ARRAY_SET64 passing 'pas' and 'i' as the arguments to use in the offset check and the indexed value assignment. Fixes: a533ed5e179cd ("net/mlx5: Pages management commands via mlx5 ifc") Signed-off-by: Tom Herbert Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 77c141797152..58276144ba81 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -92,12 +92,21 @@ __mlx5_mask(typ, fld)) ___t; \ }) -#define MLX5_SET64(typ, p, fld, v) do { \ +#define __MLX5_SET64(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) != 64); \ - BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ *((__be64 *)(p) + __mlx5_64_off(typ, fld)) = cpu_to_be64(v); \ } while (0) +#define MLX5_SET64(typ, p, fld, v) do { \ + BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ + __MLX5_SET64(typ, p, fld, v); \ +} while (0) + +#define MLX5_ARRAY_SET64(typ, p, fld, idx, v) do { \ + BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ + __MLX5_SET64(typ, p, fld[idx], v); \ +} while (0) + #define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld))) #define MLX5_GET64_PR(typ, p, fld) ({ \ -- cgit v1.2.3 From d60874cd58fcb21372f2df698c20f8cf2f78fdcb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 4 Oct 2016 14:40:45 +0200 Subject: vfs: add vfs_get_link() helper This helper is for filesystems that want to read the symlink and are better off with the get_link() interface (returning a char *) rather than the readlink() interface (copy into a userspace buffer). Also call the LSM hook for readlink (not get_link) since this is for symlink reading not following. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..bc8ac5108368 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2919,6 +2919,7 @@ extern int vfs_stat(const char __user *, struct kstat *); extern int vfs_lstat(const char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_fstatat(int , const char __user *, struct kstat *, int); +extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int __generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, -- cgit v1.2.3 From 496063426dece3f47e21f9f3387205d6ca03bd2a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 14 Oct 2016 14:26:24 -0400 Subject: pkeys: Remove easily triggered WARN This easy-to-trigger warning shows up instantly when running Trinity on a kernel with CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS disabled. At most this should have been a printk, but the -EINVAL alone should be more than adequate indicator that something isn't available. Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- include/linux/pkeys.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index e4c08c1ff0c5..a1bacf1150b2 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -25,7 +25,6 @@ static inline int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - WARN_ONCE(1, "free of protection key when disabled"); return -EINVAL; } -- cgit v1.2.3 From 9f7d416c36124667c406978bcb39746589c35d7f Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 14 Oct 2016 16:07:23 +0200 Subject: kprobes: Unpoison stack in jprobe_return() for KASAN I observed false KSAN positives in the sctp code, when sctp uses jprobe_return() in jsctp_sf_eat_sack(). The stray 0xf4 in shadow memory are stack redzones: [ ] ================================================================== [ ] BUG: KASAN: stack-out-of-bounds in memcmp+0xe9/0x150 at addr ffff88005e48f480 [ ] Read of size 1 by task syz-executor/18535 [ ] page:ffffea00017923c0 count:0 mapcount:0 mapping: (null) index:0x0 [ ] flags: 0x1fffc0000000000() [ ] page dumped because: kasan: bad access detected [ ] CPU: 1 PID: 18535 Comm: syz-executor Not tainted 4.8.0+ #28 [ ] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ ] ffff88005e48f2d0 ffffffff82d2b849 ffffffff0bc91e90 fffffbfff10971e8 [ ] ffffed000bc91e90 ffffed000bc91e90 0000000000000001 0000000000000000 [ ] ffff88005e48f480 ffff88005e48f350 ffffffff817d3169 ffff88005e48f370 [ ] Call Trace: [ ] [] dump_stack+0x12e/0x185 [ ] [] kasan_report+0x489/0x4b0 [ ] [] __asan_report_load1_noabort+0x19/0x20 [ ] [] memcmp+0xe9/0x150 [ ] [] depot_save_stack+0x176/0x5c0 [ ] [] save_stack+0xb1/0xd0 [ ] [] kasan_slab_free+0x72/0xc0 [ ] [] kfree+0xc8/0x2a0 [ ] [] skb_free_head+0x79/0xb0 [ ] [] skb_release_data+0x37a/0x420 [ ] [] skb_release_all+0x4f/0x60 [ ] [] consume_skb+0x138/0x370 [ ] [] sctp_chunk_put+0xcb/0x180 [ ] [] sctp_chunk_free+0x58/0x70 [ ] [] sctp_inq_pop+0x68f/0xef0 [ ] [] sctp_assoc_bh_rcv+0xd6/0x4b0 [ ] [] sctp_inq_push+0x131/0x190 [ ] [] sctp_backlog_rcv+0xe9/0xa20 [ ... ] [ ] Memory state around the buggy address: [ ] ffff88005e48f380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] ffff88005e48f400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] >ffff88005e48f480: f4 f4 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] ^ [ ] ffff88005e48f500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] ffff88005e48f580: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ ] ================================================================== KASAN stack instrumentation poisons stack redzones on function entry and unpoisons them on function exit. If a function exits abnormally (e.g. with a longjmp like jprobe_return()), stack redzones are left poisoned. Later this leads to random KASAN false reports. Unpoison stack redzones in the frames we are going to jump over before doing actual longjmp in jprobe_return(). Signed-off-by: Dmitry Vyukov Acked-by: Masami Hiramatsu Reviewed-by: Mark Rutland Cc: Mark Rutland Cc: Catalin Marinas Cc: Andrey Ryabinin Cc: Lorenzo Pieralisi Cc: Alexander Potapenko Cc: Will Deacon Cc: Andrew Morton Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Masami Hiramatsu Cc: kasan-dev@googlegroups.com Cc: surovegin@google.com Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/1476454043-101898-1-git-send-email-dvyukov@google.com Signed-off-by: Ingo Molnar --- include/linux/kasan.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d600303306eb..820c0ad54a01 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -44,6 +44,7 @@ static inline void kasan_disable_current(void) void kasan_unpoison_shadow(const void *address, size_t size); void kasan_unpoison_task_stack(struct task_struct *task); +void kasan_unpoison_stack_above_sp_to(const void *watermark); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); @@ -85,6 +86,7 @@ size_t kasan_metadata_size(struct kmem_cache *cache); static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +static inline void kasan_unpoison_stack_above_sp_to(const void *watermark) {} static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} -- cgit v1.2.3 From 9224eb77e63f70f16c0b6b7a20ca7d395f3bc077 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 17 Oct 2016 16:00:46 +0100 Subject: irqchip/gic-v3-its: Fix entry size mask for GITS_BASER Entry Size in GITS_BASER occupies 5 bits [52:48], but we mask out 8 bits. Fixes: cc2d3216f53c ("irqchip: GICv3: ITS command queue") Cc: stable@vger.kernel.org Signed-off-by: Vladimir Murzin Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 8361c8d3edd1..b7e34313cdfe 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -290,7 +290,7 @@ #define GITS_BASER_TYPE_SHIFT (56) #define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) #define GITS_BASER_ENTRY_SIZE_SHIFT (48) -#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0xff) + 1) +#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) #define GITS_BASER_SHAREABILITY_SHIFT (10) #define GITS_BASER_InnerShareable \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) -- cgit v1.2.3 From 71757904efadefdf5505712f675218ce59483c5d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 17 Oct 2016 08:18:15 -0700 Subject: generic syscalls: kill cruft from removed pkey syscalls pkey_set() and pkey_get() were syscalls present in older versions of the protection keys patches. They were fully excised from the x86 code, but some cruft was left in the generic syscall code. The C++ comments were intended to help to make it more glaring to me to fix them before actually submitting them. That technique worked, but later than I would have liked. I test-compiled this for arm64. Fixes: a60f7b69d92c0 ("generic syscalls: Wire up memory protection keys syscalls") Signed-off-by: Dave Hansen Acked-by: Arnd Bergmann Cc: Thomas Gleixner Cc: x86@kernel.org Cc: linux-arch@vger.kernel.org Cc: mgorman@techsingularity.net Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0d7abb8b7315..91a740f6b884 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -902,8 +902,5 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); -//asmlinkage long sys_pkey_get(int pkey, unsigned long flags); -//asmlinkage long sys_pkey_set(int pkey, unsigned long access_rights, -// unsigned long flags); #endif -- cgit v1.2.3 From 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 13 Oct 2016 13:07:36 -0700 Subject: mm: remove gup_flags FOLL_WRITE games from __get_user_pages() This is an ancient bug that was actually attempted to be fixed once (badly) by me eleven years ago in commit 4ceb5db9757a ("Fix get_user_pages() race for write access") but that was then undone due to problems on s390 by commit f33ea7f404e5 ("fix get_user_pages bug"). In the meantime, the s390 situation has long been fixed, and we can now fix it by checking the pte_dirty() bit properly (and do it better). The s390 dirty bit was implemented in abf09bed3cce ("s390/mm: implement software dirty bits") which made it into v3.9. Earlier kernels will have to look at the page state itself. Also, the VM has become more scalable, and what used a purely theoretical race back then has become easier to trigger. To fix it, we introduce a new internal FOLL_COW flag to mark the "yes, we already did a COW" rather than play racy games with FOLL_WRITE that is very fundamental, and then use the pte dirty flag to validate that the FOLL_COW flag is still valid. Reported-and-tested-by: Phil "not Paul" Oester Acked-by: Hugh Dickins Reviewed-by: Michal Hocko Cc: Andy Lutomirski Cc: Kees Cook Cc: Oleg Nesterov Cc: Willy Tarreau Cc: Nick Piggin Cc: Greg Thelen Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index e9caec6a51e9..ed85879f47f5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2232,6 +2232,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_MLOCK 0x1000 /* lock present pages */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ +#define FOLL_COW 0x4000 /* internal GUP flag */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); -- cgit v1.2.3 From d4944b0ecec0af882483fe44b66729316e575208 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:12 +0100 Subject: mm: remove write/force parameters from __get_user_pages_unlocked() This removes the redundant 'write' and 'force' parameters from __get_user_pages_unlocked() to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Paolo Bonzini Reviewed-by: Jan Kara Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ed85879f47f5..bcdea1f4e98c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1285,8 +1285,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags); + struct page **pages, unsigned int gup_flags); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, -- cgit v1.2.3 From c164154f66f0c9b02673f07aa4f044f1d9c70274 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:13 +0100 Subject: mm: replace get_user_pages_unlocked() write/force parameters with gup_flags This removes the 'write' and 'force' use from get_user_pages_unlocked() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Reviewed-by: Jan Kara Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bcdea1f4e98c..abd53f2eb74e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1287,7 +1287,7 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages); + struct page **pages, unsigned int gup_flags); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); -- cgit v1.2.3 From 3b913179c3fa89dd0e304193fa0c746fc0481447 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:14 +0100 Subject: mm: replace get_user_pages_locked() write/force parameters with gup_flags This removes the 'write' and 'force' use from get_user_pages_locked() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Reviewed-by: Jan Kara Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index abd53f2eb74e..9fe9b0438169 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1282,7 +1282,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, int *locked); + unsigned int gup_flags, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); -- cgit v1.2.3 From 7f23b3504a0df63b724180262c5f3f117f21bcae Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:15 +0100 Subject: mm: replace get_vaddr_frames() write/force parameters with gup_flags This removes the 'write' and 'force' from get_vaddr_frames() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Reviewed-by: Jan Kara Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9fe9b0438169..91cc923ce985 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1305,7 +1305,7 @@ struct frame_vector { struct frame_vector *frame_vector_create(unsigned int nr_frames); void frame_vector_destroy(struct frame_vector *vec); int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, - bool write, bool force, struct frame_vector *vec); + unsigned int gup_flags, struct frame_vector *vec); void put_vaddr_frames(struct frame_vector *vec); int frame_vector_to_pages(struct frame_vector *vec); void frame_vector_to_pfns(struct frame_vector *vec); -- cgit v1.2.3 From 768ae309a96103ed02eb1e111e838c87854d8b51 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:16 +0100 Subject: mm: replace get_user_pages() write/force parameters with gup_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This removes the 'write' and 'force' from get_user_pages() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Christian König Acked-by: Jesper Nilsson Acked-by: Michal Hocko Reviewed-by: Jan Kara Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 91cc923ce985..30bb5d9631bb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1279,7 +1279,7 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, int write, int force, struct page **pages, struct vm_area_struct **vmas); long get_user_pages(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas); long get_user_pages_locked(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked); -- cgit v1.2.3 From 9beae1ea89305a9667ceaab6d0bf46a045ad71e7 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:17 +0100 Subject: mm: replace get_user_pages_remote() write/force parameters with gup_flags This removes the 'write' and 'force' from get_user_pages_remote() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Reviewed-by: Jan Kara Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 30bb5d9631bb..ecc4be7b67e0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1276,7 +1276,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct **vmas, int *nonblocking); long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas); long get_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, -- cgit v1.2.3 From 6347e8d5bcce33fc36e651901efefbe2c93a43ef Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:19 +0100 Subject: mm: replace access_remote_vm() write parameter with gup_flags This removes the 'write' argument from access_remote_vm() and replaces it with 'gup_flags' as use of this function previously silently implied FOLL_FORCE, whereas after this patch callers explicitly pass this flag. We make this explicit as use of FOLL_FORCE can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ecc4be7b67e0..f31bf9058587 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1268,7 +1268,7 @@ static inline int fixup_user_fault(struct task_struct *tsk, extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write); + void *buf, int len, unsigned int gup_flags); long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, -- cgit v1.2.3 From f307ab6dcea03f9d8e4d70508fd7d1ca57cfa7f9 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:20 +0100 Subject: mm: replace access_process_vm() write parameter with gup_flags This removes the 'write' argument from access_process_vm() and replaces it with 'gup_flags' as use of this function previously silently implied FOLL_FORCE, whereas after this patch callers explicitly pass this flag. We make this explicit as use of FOLL_FORCE can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Jesper Nilsson Acked-by: Michal Hocko Acked-by: Michael Ellerman Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f31bf9058587..ffbd72979ee7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1266,7 +1266,8 @@ static inline int fixup_user_fault(struct task_struct *tsk, } #endif -extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); +extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, + unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -- cgit v1.2.3 From 8ef2074d28373014d05e92b5f13364ef51075b6e Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 19 Oct 2016 09:51:05 -0600 Subject: nvme: Add tertiary number to NVME_VS NVMe 1.2.1 specification adds a tertiary element to the version number. This updates the macro and its callers to include the final number and fixup a single place in nvmet where the version was generated manually. Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7676557ce357..086d196e68f7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -960,6 +960,7 @@ struct nvme_completion { __le16 status; /* did the command fail, and if so, why? */ }; -#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8)) +#define NVME_VS(major, minor, tertiary) \ + (((major) << 16) | ((minor) << 8) | (tertiary)) #endif /* _LINUX_NVME_H */ -- cgit v1.2.3 From a446c0840e244f34c22cc13b3a62d50aa51fb4c6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Sep 2016 13:51:06 +0200 Subject: nvme.h: resync with nvme-cli Import a few updates to nvme.h from nvme-cli. This mostly includes a few new fields and error codes, but also a few renames that so far are only used in user space. Also one field is moved from an array of two le64 values to one of 16 u8 values so that we can more easily access it. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- include/linux/nvme.h | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 086d196e68f7..989699641e10 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -182,7 +182,7 @@ struct nvme_id_ctrl { char fr[8]; __u8 rab; __u8 ieee[3]; - __u8 mic; + __u8 cmic; __u8 mdts; __le16 cntlid; __le32 ver; @@ -202,7 +202,13 @@ struct nvme_id_ctrl { __u8 apsta; __le16 wctemp; __le16 cctemp; - __u8 rsvd270[50]; + __le16 mtfa; + __le32 hmpre; + __le32 hmmin; + __u8 tnvmcap[16]; + __u8 unvmcap[16]; + __le32 rpmbs; + __u8 rsvd316[4]; __le16 kas; __u8 rsvd322[190]; __u8 sqes; @@ -267,7 +273,7 @@ struct nvme_id_ns { __le16 nabo; __le16 nabspf; __u16 rsvd46; - __le64 nvmcap[2]; + __u8 nvmcap[16]; __u8 rsvd64[40]; __u8 nguid[16]; __u8 eui64[8]; @@ -556,8 +562,10 @@ enum nvme_admin_opcode { nvme_admin_set_features = 0x09, nvme_admin_get_features = 0x0a, nvme_admin_async_event = 0x0c, + nvme_admin_ns_mgmt = 0x0d, nvme_admin_activate_fw = 0x10, nvme_admin_download_fw = 0x11, + nvme_admin_ns_attach = 0x15, nvme_admin_keep_alive = 0x18, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, @@ -583,6 +591,7 @@ enum { NVME_FEAT_WRITE_ATOMIC = 0x0a, NVME_FEAT_ASYNC_EVENT = 0x0b, NVME_FEAT_AUTO_PST = 0x0c, + NVME_FEAT_HOST_MEM_BUF = 0x0d, NVME_FEAT_KATO = 0x0f, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, @@ -745,7 +754,7 @@ struct nvmf_common_command { struct nvmf_disc_rsp_page_entry { __u8 trtype; __u8 adrfam; - __u8 nqntype; + __u8 subtype; __u8 treq; __le16 portid; __le16 cntlid; @@ -905,12 +914,23 @@ enum { NVME_SC_INVALID_VECTOR = 0x108, NVME_SC_INVALID_LOG_PAGE = 0x109, NVME_SC_INVALID_FORMAT = 0x10a, - NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b, + NVME_SC_FW_NEEDS_CONV_RESET = 0x10b, NVME_SC_INVALID_QUEUE = 0x10c, NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, NVME_SC_FEATURE_NOT_PER_NS = 0x10f, - NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110, + NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, + NVME_SC_FW_NEEDS_RESET = 0x111, + NVME_SC_FW_NEEDS_MAX_TIME = 0x112, + NVME_SC_FW_ACIVATE_PROHIBITED = 0x113, + NVME_SC_OVERLAPPING_RANGE = 0x114, + NVME_SC_NS_INSUFFICENT_CAP = 0x115, + NVME_SC_NS_ID_UNAVAILABLE = 0x116, + NVME_SC_NS_ALREADY_ATTACHED = 0x118, + NVME_SC_NS_IS_PRIVATE = 0x119, + NVME_SC_NS_NOT_ATTACHED = 0x11a, + NVME_SC_THIN_PROV_NOT_SUPP = 0x11b, + NVME_SC_CTRL_LIST_INVALID = 0x11c, /* * I/O Command Set Specific - NVM commands: @@ -941,6 +961,7 @@ enum { NVME_SC_REFTAG_CHECK = 0x284, NVME_SC_COMPARE_FAILED = 0x285, NVME_SC_ACCESS_DENIED = 0x286, + NVME_SC_UNWRITTEN_BLOCK = 0x287, NVME_SC_DNR = 0x4000, }; -- cgit v1.2.3 From 8d63687afda019a6e037bf9c4ceb3e514c26a35d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Sep 2016 13:51:07 +0200 Subject: nvme.h: don't use uuid_be This makes life easier for nvme-cli and we don't really need the uuid type anyway to start with. Signed-off-by: Christoph Hellwig Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Jay Freyensee Signed-off-by: Jens Axboe --- include/linux/nvme.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 989699641e10..d31ff2dd1d51 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -16,7 +16,6 @@ #define _LINUX_NVME_H #include -#include /* NQN names in commands fields specified one size */ #define NVMF_NQN_FIELD_LEN 256 @@ -803,7 +802,7 @@ struct nvmf_connect_command { }; struct nvmf_connect_data { - uuid_be hostid; + __u8 hostid[16]; __le16 cntlid; char resv4[238]; char subsysnqn[NVMF_NQN_FIELD_LEN]; -- cgit v1.2.3 From 329dd7681c5af84e8ea9f4494c1a304389cdfc6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Sep 2016 13:51:08 +0200 Subject: nvme.h: add an enum for cns values Ported over from nvme-cli. Signed-off-by: Christoph Hellwig Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d31ff2dd1d51..fc3c24206593 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -281,6 +281,16 @@ struct nvme_id_ns { __u8 vs[3712]; }; +enum { + NVME_ID_CNS_NS = 0x00, + NVME_ID_CNS_CTRL = 0x01, + NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, + NVME_ID_CNS_NS_PRESENT_LIST = 0x10, + NVME_ID_CNS_NS_PRESENT = 0x11, + NVME_ID_CNS_CTRL_NS_LIST = 0x12, + NVME_ID_CNS_CTRL_LIST = 0x13, +}; + enum { NVME_NS_FEAT_THIN = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, -- cgit v1.2.3 From d17af5056cf9e9fc05e68832f7c15687fcc12281 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 30 Sep 2016 10:58:58 -0700 Subject: mm: Change vm_is_stack_for_task() to vm_is_stack_for_current() Asking for a non-current task's stack can't be done without races unless the task is frozen in kernel mode. As far as I know, vm_is_stack_for_task() never had a safe non-current use case. The __unused annotation is because some KSTK_ESP implementations ignore their parameter, which IMO is further justification for this patch. Signed-off-by: Andy Lutomirski Acked-by: Thomas Gleixner Cc: Al Viro Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Jann Horn Cc: Kees Cook Cc: Linus Torvalds Cc: Linux API Cc: Peter Zijlstra Cc: Tycho Andersen Link: http://lkml.kernel.org/r/4c3f68f426e6c061ca98b4fc7ef85ffbb0a25b0c.1475257877.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index e9caec6a51e9..a658a5167bce 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1391,7 +1391,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma, !vma_growsup(vma->vm_next, addr); } -int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t); +int vma_is_stack_for_current(struct vm_area_struct *vma); extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, -- cgit v1.2.3 From c8061485a0d7569a865a3cc3c63347b0f42b3765 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 19 Oct 2016 19:28:11 +0100 Subject: sched/core, x86: Make struct thread_info arch specific again The following commit: c65eacbe290b ("sched/core: Allow putting thread_info into task_struct") ... made 'struct thread_info' a generic struct with only a single ::flags member, if CONFIG_THREAD_INFO_IN_TASK_STRUCT=y is selected. This change however seems to be quite x86 centric, since at least the generic preemption code (asm-generic/preempt.h) assumes that struct thread_info also has a preempt_count member, which apparently was not true for x86. We could add a bit more #ifdefs to solve this problem too, but it seems to be much simpler to make struct thread_info arch specific again. This also makes the conversion to THREAD_INFO_IN_TASK_STRUCT a bit easier for architectures that have a couple of arch specific stuff in their thread_info definition. The arch specific stuff _could_ be moved to thread_struct. However keeping them in thread_info makes it easier: accessing thread_info members is simple, since it is at the beginning of the task_struct, while the thread_struct is at the end. At least on s390 the offsets needed to access members of the thread_struct (with task_struct as base) are too large for various asm instructions. This is not a problem when keeping these members within thread_info. Signed-off-by: Heiko Carstens Signed-off-by: Mark Rutland Acked-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/1476901693-8492-2-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/thread_info.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 45f004e9cc59..2873baf5372a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -13,17 +13,6 @@ struct timespec; struct compat_timespec; -#ifdef CONFIG_THREAD_INFO_IN_TASK -struct thread_info { - unsigned long flags; /* low level flags */ -}; - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .flags = 0, \ -} -#endif - #ifdef CONFIG_THREAD_INFO_IN_TASK #define current_thread_info() ((struct thread_info *)current) #endif -- cgit v1.2.3 From c6fe46a79ecd79606bb96fada4515f6b23f87b62 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 18 Oct 2016 00:41:12 +0900 Subject: cpufreq: fix overflow in cpufreq_table_find_index_dl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'best' is always less or equals to 'pos', so `best - pos' returns a negative value which is then getting casted to `unsigned int' and passed to __cpufreq_driver_target()->acpi_cpufreq_target() for policy->freq_table selection. This results in BUG: unable to handle kernel paging request at ffff881019b469f8 IP: [] acpi_cpufreq_target+0x4f/0x190 [acpi_cpufreq] PGD 267f067 PUD 0 Oops: 0000 [#1] PREEMPT SMP CPU: 6 PID: 70 Comm: kworker/6:1 Not tainted 4.9.0-rc1-next-20161017-dbg-dirty Workqueue: events dbs_work_handler task: ffff88041b808000 task.stack: ffff88041b810000 RIP: 0010:[] [] acpi_cpufreq_target+0x4f/0x190 [acpi_cpufreq] RSP: 0018:ffff88041b813c60 EFLAGS: 00010282 RAX: ffff880419b46a00 RBX: ffff88041b848400 RCX: ffff880419b20f80 RDX: 00000000001dff38 RSI: 00000000ffffffff RDI: ffff88041b848400 RBP: ffff88041b813cb0 R08: 0000000000000006 R09: 0000000000000040 R10: ffffffff8207f9e0 R11: ffffffff8173595b R12: 0000000000000000 R13: ffff88041f1dff38 R14: 0000000000262900 R15: 0000000bfffffff4 FS: 0000000000000000(0000) GS:ffff88041f000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff881019b469f8 CR3: 000000041a2d3000 CR4: 00000000001406e0 Stack: ffff88041b813cb0 ffffffff813347f9 ffff88041b813ca0 ffffffff81334663 ffff88041f1d4bc0 ffff88041b848400 0000000000000000 0000000000000000 0000000000262900 0000000000000000 ffff88041b813d00 ffffffff813355dc Call Trace: [] ? cpufreq_freq_transition_begin+0xf1/0xfc [] ? get_cpu_idle_time+0x97/0xa6 [] __cpufreq_driver_target+0x3b6/0x44e [] cs_dbs_timer+0x11a/0x135 [] dbs_work_handler+0x39/0x62 [] process_one_work+0x280/0x4a5 [] worker_thread+0x24f/0x397 [] ? rescuer_thread+0x30b/0x30b [] ? nl80211_get_key+0x29/0x36a [] kthread+0xfc/0x104 [] ? put_lock_stats.isra.9+0xe/0x20 [] ? kthread_create_on_node+0x3f/0x3f [] ret_from_fork+0x22/0x30 Code: 56 4d 6b ff 0c 41 55 41 54 53 48 83 ec 28 48 8b 15 ad 1e 00 00 44 8b 41 08 48 8b 87 c8 00 00 00 49 89 d5 4e 03 2c c5 80 b2 78 81 <46> 8b 74 38 04 45 3b 75 00 75 11 31 c0 83 39 00 0f 84 1c 01 00 RIP [] acpi_cpufreq_target+0x4f/0x190 [acpi_cpufreq] RSP CR2: ffff881019b469f8 ---[ end trace 16d9fc7a17897d37 ]--- [ rjw: In some cases this bug may also cause incorrect frequencies to be selected by cpufreq governors. ] Fixes: 899bb6642f2a (cpufreq: skip invalid entries when searching the frequency) Link: http://marc.info/?l=linux-kernel&m=147672030714331&w=2 Reported-and-tested-by: Sedat Dilek Reported-and-tested-by: Jörg Otte Signed-off-by: Sergey Senozhatsky Acked-by: Viresh Kumar Cc: 4.8+ # 4.8+ Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 5fa55fc56e18..32dc0cbd51ca 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -677,10 +677,10 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, if (best == table - 1) return pos - table; - return best - pos; + return best - table; } - return best - pos; + return best - table; } /* Works only on sorted freq-tables */ -- cgit v1.2.3 From 9995f4f184613fb02ee73092b03545520a72b104 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 13 Oct 2016 21:51:06 +0000 Subject: clocksource: Add J-Core timer/clocksource driver At the hardware level, the J-Core PIT is integrated with the interrupt controller, but it is represented as its own device and has an independent programming interface. It provides a 12-bit countdown timer, which is not presently used, and a periodic timer. The interval length for the latter is programmable via a 32-bit throttle register whose units are determined by a bus-period register. The periodic timer is used to implement both periodic and oneshot clock event modes; in oneshot mode the interrupt handler simply disables the timer as soon as it fires. Despite its device tree node representing an interrupt for the PIT, the actual irq generated is programmable, not hard-wired. The driver is responsible for programming the PIT to generate the hardware irq number that the DT assigns to it. On SMP configurations, J-Core provides cpu-local instances of the PIT; no broadcast timer is needed. This driver supports the creation of the necessary per-cpu clock_event_device instances. A nanosecond-resolution clocksource is provided using the J-Core "RTC" registers, which give a 64-bit seconds count and 32-bit nanoseconds that wrap every second. The driver converts these to a full-range 32-bit nanoseconds count. Signed-off-by: Rich Felker Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: Daniel Lezcano Cc: Rob Herring Link: http://lkml.kernel.org/r/b591ff12cc5ebf63d1edc98da26046f95a233814.1476393790.git.dalias@libc.org Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 9b207a8c5af3..afe641c02dca 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -81,6 +81,7 @@ enum cpuhp_state { CPUHP_AP_ARM_ARCH_TIMER_STARTING, CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, CPUHP_AP_DUMMY_TIMER_STARTING, + CPUHP_AP_JCORE_TIMER_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_TWD_STARTING, CPUHP_AP_METAG_TIMER_STARTING, -- cgit v1.2.3