From c8770bcf5cefa8cbfae21c07c4fe3428f5a9d42a Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Mon, 7 Mar 2016 10:38:18 +0800 Subject: ath9k: Allow platform override BTCoex pin Add new platform data to allow override BTCoex default pin. Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo --- include/linux/ath9k_platform.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h index 33eb274cd0e6..e66153d60bd5 100644 --- a/include/linux/ath9k_platform.h +++ b/include/linux/ath9k_platform.h @@ -31,6 +31,10 @@ struct ath9k_platform_data { u32 gpio_mask; u32 gpio_val; + u32 bt_active_pin; + u32 bt_priority_pin; + u32 wlan_active_pin; + bool endian_check; bool is_clk_25mhz; bool tx_gain_buffalo; -- cgit v1.2.3 From 08a33805518e7845486f88287e8aace6f8439391 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 9 Mar 2016 11:30:12 -0800 Subject: iio: core: implement iio_device_{claim|release}_direct_mode() It is often the case that the driver wants to be sure a device stays in direct mode while it is executing a task or series of tasks. To accomplish this today, the driver performs this sequence: 1) take the device state lock, 2) verify it is not in a buffered mode, 3) execute some tasks, and 4) release that lock. This patch introduces a pair of helper functions that simplify these steps and make it more semantically expressive. iio_device_claim_direct_mode() If the device is not in any buffered mode it is guaranteed to stay that way until iio_release_direct_mode() is called. iio_device_release_direct_mode() Release the claim. Device is no longer guaranteed to stay in direct mode. Signed-off-by: Alison Schofield Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index b2b16772c651..0b2773ada0ba 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -527,6 +527,8 @@ void iio_device_unregister(struct iio_dev *indio_dev); int devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev); void devm_iio_device_unregister(struct device *dev, struct iio_dev *indio_dev); int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); +int iio_device_claim_direct_mode(struct iio_dev *indio_dev); +void iio_device_release_direct_mode(struct iio_dev *indio_dev); extern struct bus_type iio_bus_type; -- cgit v1.2.3 From d13d3a573be5535123beacd926be38e571097bc5 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 23 Mar 2016 11:24:47 +0000 Subject: regulator: add missing descriptions in regulator_desc Members csel_reg and csel_mask of the regulator_desc struct are missing descriptions for documentation. Adding them. Signed-off-by: Luis de Bethencourt Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index cd271e89a7e6..01d26244a610 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -255,6 +255,8 @@ enum regulator_type { * * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_ * @vsel_mask: Mask for register bitfield used for selector + * @csel_reg: Register for TPS65218 LS3 current regulator + * @csel_mask: Mask for TPS65218 LS3 current regulator * @apply_reg: Register for initiate voltage change on the output when * using regulator_set_voltage_sel_regmap * @apply_bit: Register bitfield used for initiate voltage change on the -- cgit v1.2.3 From abf2f825d115397944cab91a20c937331d77e37c Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 23 Mar 2016 11:35:39 +0000 Subject: regulator: add missing description for set_over_current_protection Over current protection is missing descriptions for documentation. Signed-off-by: Luis de Bethencourt Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 3 +++ include/linux/regulator/machine.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 01d26244a610..1392022fe509 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -93,6 +93,9 @@ struct regulator_linear_range { * @get_current_limit: Get the configured limit for a current-limited regulator. * @set_input_current_limit: Configure an input limit. * + * @set_over_current_protection: Support capability of automatically shutting + * down when detecting an over current event. + * * @set_active_discharge: Set active discharge enable/disable of regulators. * * @set_mode: Set the configured operating mode for the regulator. diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 5d627c83a630..ad3e5158e586 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -97,6 +97,7 @@ struct regulator_state { * @ramp_disable: Disable ramp delay when initialising or when setting voltage. * @soft_start: Enable soft start so that voltage ramps slowly. * @pull_down: Enable pull down when regulator is disabled. + * @over_current_protection: Auto disable on over current event. * * @input_uV: Input voltage for regulator when supplied by another regulator. * -- cgit v1.2.3 From 81f4c50607b423a59f8a1b03e1e8fc409a1dcd22 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:22:01 -0400 Subject: constify security_path_truncate() Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index cdee11cbcdf1..77c3bfdacf16 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1366,7 +1366,7 @@ union security_list_options { int (*path_rmdir)(struct path *dir, struct dentry *dentry); int (*path_mknod)(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); - int (*path_truncate)(struct path *path); + int (*path_truncate)(const struct path *path); int (*path_symlink)(struct path *dir, struct dentry *dentry, const char *old_name); int (*path_link)(struct dentry *old_dentry, struct path *new_dir, diff --git a/include/linux/security.h b/include/linux/security.h index 157f0cb1e4d2..be37ccab2286 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1447,7 +1447,7 @@ int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode); int security_path_rmdir(struct path *dir, struct dentry *dentry); int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); -int security_path_truncate(struct path *path); +int security_path_truncate(const struct path *path); int security_path_symlink(struct path *dir, struct dentry *dentry, const char *old_name); int security_path_link(struct dentry *old_dentry, struct path *new_dir, @@ -1481,7 +1481,7 @@ static inline int security_path_mknod(struct path *dir, struct dentry *dentry, return 0; } -static inline int security_path_truncate(struct path *path) +static inline int security_path_truncate(const struct path *path) { return 0; } -- cgit v1.2.3 From 7df818b2370a9aab5fc58a85b70b8af3d835affa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:24:09 -0400 Subject: constify vfs_truncate() Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..09a68517e952 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2253,7 +2253,7 @@ struct filename { const char iname[]; }; -extern long vfs_truncate(struct path *, loff_t); +extern long vfs_truncate(const struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, -- cgit v1.2.3 From 7fd25dac9ad3970bede16f2834daf9f9d779d1b0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:44:41 -0400 Subject: constify chown_common/security_path_chown Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 77c3bfdacf16..84f76cbc6d06 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1375,7 +1375,7 @@ union security_list_options { struct path *new_dir, struct dentry *new_dentry); int (*path_chmod)(struct path *path, umode_t mode); - int (*path_chown)(struct path *path, kuid_t uid, kgid_t gid); + int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); int (*path_chroot)(struct path *path); #endif diff --git a/include/linux/security.h b/include/linux/security.h index be37ccab2286..f83ca920ed46 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1456,7 +1456,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry, unsigned int flags); int security_path_chmod(struct path *path, umode_t mode); -int security_path_chown(struct path *path, kuid_t uid, kgid_t gid); +int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) @@ -1513,7 +1513,7 @@ static inline int security_path_chmod(struct path *path, umode_t mode) return 0; } -static inline int security_path_chown(struct path *path, kuid_t uid, kgid_t gid) +static inline int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid) { return 0; } -- cgit v1.2.3 From 8a04c43b8741ebb40508d160cf87ca74b70941af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:52:53 -0400 Subject: constify security_sb_mount() Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 84f76cbc6d06..47117751f4eb 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1343,7 +1343,7 @@ union security_list_options { int (*sb_kern_mount)(struct super_block *sb, int flags, void *data); int (*sb_show_options)(struct seq_file *m, struct super_block *sb); int (*sb_statfs)(struct dentry *dentry); - int (*sb_mount)(const char *dev_name, struct path *path, + int (*sb_mount)(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); int (*sb_umount)(struct vfsmount *mnt, int flags); int (*sb_pivotroot)(struct path *old_path, struct path *new_path); diff --git a/include/linux/security.h b/include/linux/security.h index f83ca920ed46..415a357efe4c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -222,7 +222,7 @@ int security_sb_remount(struct super_block *sb, void *data); int security_sb_kern_mount(struct super_block *sb, int flags, void *data); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); -int security_sb_mount(const char *dev_name, struct path *path, +int security_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); int security_sb_umount(struct vfsmount *mnt, int flags); int security_sb_pivotroot(struct path *old_path, struct path *new_path); @@ -530,7 +530,7 @@ static inline int security_sb_statfs(struct dentry *dentry) return 0; } -static inline int security_sb_mount(const char *dev_name, struct path *path, +static inline int security_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data) { -- cgit v1.2.3 From be01f9f28e66fa846f02196eb047c6bc445642db Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 14:56:23 -0400 Subject: constify chmod_common/security_path_chmod Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 47117751f4eb..294fdfe902bf 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1374,7 +1374,7 @@ union security_list_options { int (*path_rename)(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); - int (*path_chmod)(struct path *path, umode_t mode); + int (*path_chmod)(const struct path *path, umode_t mode); int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); int (*path_chroot)(struct path *path); #endif diff --git a/include/linux/security.h b/include/linux/security.h index 415a357efe4c..d6593ee2d0a9 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1455,7 +1455,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry, unsigned int flags); -int security_path_chmod(struct path *path, umode_t mode); +int security_path_chmod(const struct path *path, umode_t mode); int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ @@ -1508,7 +1508,7 @@ static inline int security_path_rename(struct path *old_dir, return 0; } -static inline int security_path_chmod(struct path *path, umode_t mode) +static inline int security_path_chmod(const struct path *path, umode_t mode) { return 0; } -- cgit v1.2.3 From 989f74e0500a1e136d369bb619adc22786ea5e68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:13:39 -0400 Subject: constify security_path_{unlink,rmdir} Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 4 ++-- include/linux/security.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 294fdfe902bf..322912cc2da1 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1360,10 +1360,10 @@ union security_list_options { #ifdef CONFIG_SECURITY_PATH - int (*path_unlink)(struct path *dir, struct dentry *dentry); + int (*path_unlink)(const struct path *dir, struct dentry *dentry); int (*path_mkdir)(struct path *dir, struct dentry *dentry, umode_t mode); - int (*path_rmdir)(struct path *dir, struct dentry *dentry); + int (*path_rmdir)(const struct path *dir, struct dentry *dentry); int (*path_mknod)(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int (*path_truncate)(const struct path *path); diff --git a/include/linux/security.h b/include/linux/security.h index d6593ee2d0a9..e292d8cb21d7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1442,9 +1442,9 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_SECURITY_PATH -int security_path_unlink(struct path *dir, struct dentry *dentry); +int security_path_unlink(const struct path *dir, struct dentry *dentry); int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode); -int security_path_rmdir(struct path *dir, struct dentry *dentry); +int security_path_rmdir(const struct path *dir, struct dentry *dentry); int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int security_path_truncate(const struct path *path); @@ -1459,7 +1459,7 @@ int security_path_chmod(const struct path *path, umode_t mode); int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ -static inline int security_path_unlink(struct path *dir, struct dentry *dentry) +static inline int security_path_unlink(const struct path *dir, struct dentry *dentry) { return 0; } @@ -1470,7 +1470,7 @@ static inline int security_path_mkdir(struct path *dir, struct dentry *dentry, return 0; } -static inline int security_path_rmdir(struct path *dir, struct dentry *dentry) +static inline int security_path_rmdir(const struct path *dir, struct dentry *dentry) { return 0; } -- cgit v1.2.3 From d360775217070ff0f4291e47d3f568f0fe0b7374 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:21:09 -0400 Subject: constify security_path_{mkdir,mknod,symlink} ... as well as unix_mknod() and may_o_create() Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 6 +++--- include/linux/security.h | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 322912cc2da1..919fb4f98e4f 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1361,13 +1361,13 @@ union security_list_options { #ifdef CONFIG_SECURITY_PATH int (*path_unlink)(const struct path *dir, struct dentry *dentry); - int (*path_mkdir)(struct path *dir, struct dentry *dentry, + int (*path_mkdir)(const struct path *dir, struct dentry *dentry, umode_t mode); int (*path_rmdir)(const struct path *dir, struct dentry *dentry); - int (*path_mknod)(struct path *dir, struct dentry *dentry, + int (*path_mknod)(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int (*path_truncate)(const struct path *path); - int (*path_symlink)(struct path *dir, struct dentry *dentry, + int (*path_symlink)(const struct path *dir, struct dentry *dentry, const char *old_name); int (*path_link)(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); diff --git a/include/linux/security.h b/include/linux/security.h index e292d8cb21d7..ccb8c2a170e3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1443,12 +1443,12 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #ifdef CONFIG_SECURITY_PATH int security_path_unlink(const struct path *dir, struct dentry *dentry); -int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode); +int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t mode); int security_path_rmdir(const struct path *dir, struct dentry *dentry); -int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, +int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); int security_path_truncate(const struct path *path); -int security_path_symlink(struct path *dir, struct dentry *dentry, +int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name); int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); @@ -1464,7 +1464,7 @@ static inline int security_path_unlink(const struct path *dir, struct dentry *de return 0; } -static inline int security_path_mkdir(struct path *dir, struct dentry *dentry, +static inline int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t mode) { return 0; @@ -1475,7 +1475,7 @@ static inline int security_path_rmdir(const struct path *dir, struct dentry *den return 0; } -static inline int security_path_mknod(struct path *dir, struct dentry *dentry, +static inline int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) { return 0; @@ -1486,7 +1486,7 @@ static inline int security_path_truncate(const struct path *path) return 0; } -static inline int security_path_symlink(struct path *dir, struct dentry *dentry, +static inline int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name) { return 0; -- cgit v1.2.3 From 3ccee46ab487d5b87d0621824efe2500b2857c58 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:27:45 -0400 Subject: constify security_path_{link,rename} Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 6 +++--- include/linux/security.h | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 919fb4f98e4f..52c2ac5f4855 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1369,10 +1369,10 @@ union security_list_options { int (*path_truncate)(const struct path *path); int (*path_symlink)(const struct path *dir, struct dentry *dentry, const char *old_name); - int (*path_link)(struct dentry *old_dentry, struct path *new_dir, + int (*path_link)(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry); - int (*path_rename)(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, + int (*path_rename)(const struct path *old_dir, struct dentry *old_dentry, + const struct path *new_dir, struct dentry *new_dentry); int (*path_chmod)(const struct path *path, umode_t mode); int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); diff --git a/include/linux/security.h b/include/linux/security.h index ccb8c2a170e3..82854115e36b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1450,10 +1450,10 @@ int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t m int security_path_truncate(const struct path *path); int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name); -int security_path_link(struct dentry *old_dentry, struct path *new_dir, +int security_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry); -int security_path_rename(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry, +int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, + const struct path *new_dir, struct dentry *new_dentry, unsigned int flags); int security_path_chmod(const struct path *path, umode_t mode); int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); @@ -1493,15 +1493,15 @@ static inline int security_path_symlink(const struct path *dir, struct dentry *d } static inline int security_path_link(struct dentry *old_dentry, - struct path *new_dir, + const struct path *new_dir, struct dentry *new_dentry) { return 0; } -static inline int security_path_rename(struct path *old_dir, +static inline int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, + const struct path *new_dir, struct dentry *new_dentry, unsigned int flags) { -- cgit v1.2.3 From 77b286c0d26a5399912f5affd90ed73e2d8b42a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:28:43 -0400 Subject: constify security_path_chroot() Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 52c2ac5f4855..e2baca48e596 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1376,7 +1376,7 @@ union security_list_options { struct dentry *new_dentry); int (*path_chmod)(const struct path *path, umode_t mode); int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid); - int (*path_chroot)(struct path *path); + int (*path_chroot)(const struct path *path); #endif int (*inode_alloc_security)(struct inode *inode); diff --git a/include/linux/security.h b/include/linux/security.h index 82854115e36b..cb53cffbfae4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1457,7 +1457,7 @@ int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, unsigned int flags); int security_path_chmod(const struct path *path, umode_t mode); int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid); -int security_path_chroot(struct path *path); +int security_path_chroot(const struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(const struct path *dir, struct dentry *dentry) { @@ -1518,7 +1518,7 @@ static inline int security_path_chown(const struct path *path, kuid_t uid, kgid_ return 0; } -static inline int security_path_chroot(struct path *path) +static inline int security_path_chroot(const struct path *path) { return 0; } -- cgit v1.2.3 From 3b73b68c05db0b3c9b282c6e8e6eb71acc589a02 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Mar 2016 15:31:19 -0400 Subject: constify security_sb_pivotroot() Signed-off-by: Al Viro --- include/linux/lsm_hooks.h | 2 +- include/linux/security.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e2baca48e596..41c0aa6d39ea 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1346,7 +1346,7 @@ union security_list_options { int (*sb_mount)(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); int (*sb_umount)(struct vfsmount *mnt, int flags); - int (*sb_pivotroot)(struct path *old_path, struct path *new_path); + int (*sb_pivotroot)(const struct path *old_path, const struct path *new_path); int (*sb_set_mnt_opts)(struct super_block *sb, struct security_mnt_opts *opts, unsigned long kern_flags, diff --git a/include/linux/security.h b/include/linux/security.h index cb53cffbfae4..fcfa211c694f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -225,7 +225,7 @@ int security_sb_statfs(struct dentry *dentry); int security_sb_mount(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); int security_sb_umount(struct vfsmount *mnt, int flags); -int security_sb_pivotroot(struct path *old_path, struct path *new_path); +int security_sb_pivotroot(const struct path *old_path, const struct path *new_path); int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts, unsigned long kern_flags, @@ -542,8 +542,8 @@ static inline int security_sb_umount(struct vfsmount *mnt, int flags) return 0; } -static inline int security_sb_pivotroot(struct path *old_path, - struct path *new_path) +static inline int security_sb_pivotroot(const struct path *old_path, + const struct path *new_path) { return 0; } -- cgit v1.2.3 From 3b672623079bb3e5685b8549e514f2dfaa564406 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 28 Mar 2016 13:09:56 +0900 Subject: regulator: s2mps11: Fix invalid selector mask and voltages for buck9 The buck9 regulator of S2MPS11 PMIC had incorrect vsel_mask (0xff instead of 0x1f) thus reading entire register as buck9's voltage. This effectively caused regulator core to interpret values as higher voltages than they were and then to set real voltage much lower than intended. The buck9 provides power to other regulators, including LDO13 and LDO19 which supply the MMC2 (SD card). On Odroid XU3/XU4 the lower voltage caused SD card detection errors on Odroid XU3/XU4: mmc1: card never left busy state mmc1: error -110 whilst initialising SD card During driver probe the regulator core was checking whether initial voltage matches the constraints. With incorrect vsel_mask of 0xff and default value of 0x50, the core interpreted this as 5 V which is outside of constraints (3-3.775 V). Then the regulator core was adjusting the voltage to match the constraints. With incorrect vsel_mask this new voltage mapped to a vere low voltage in the driver. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Signed-off-by: Mark Brown Cc: --- include/linux/mfd/samsung/s2mps11.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h index b288965e8101..2c14eeca46f0 100644 --- a/include/linux/mfd/samsung/s2mps11.h +++ b/include/linux/mfd/samsung/s2mps11.h @@ -173,10 +173,12 @@ enum s2mps11_regulators { #define S2MPS11_LDO_VSEL_MASK 0x3F #define S2MPS11_BUCK_VSEL_MASK 0xFF +#define S2MPS11_BUCK9_VSEL_MASK 0x1F #define S2MPS11_ENABLE_MASK (0x03 << S2MPS11_ENABLE_SHIFT) #define S2MPS11_ENABLE_SHIFT 0x06 #define S2MPS11_LDO_N_VOLTAGES (S2MPS11_LDO_VSEL_MASK + 1) #define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1) +#define S2MPS11_BUCK9_N_VOLTAGES (S2MPS11_BUCK9_VSEL_MASK + 1) #define S2MPS11_RAMP_DELAY 25000 /* uV/us */ #define S2MPS11_CTRL1_PWRHOLD_MASK BIT(4) -- cgit v1.2.3 From 2da62906b1e298695e1bb725927041cd59942c98 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 Mar 2015 21:13:46 -0400 Subject: [net] drop 'size' argument of sock_recvmsg() all callers have it equal to msg_data_left(msg). Signed-off-by: Al Viro --- include/linux/net.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 49175e4ced11..72c1e0622ce2 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -218,8 +218,7 @@ int sock_create_lite(int family, int type, int proto, struct socket **res); struct socket *sock_alloc(void); void sock_release(struct socket *sock); int sock_sendmsg(struct socket *sock, struct msghdr *msg); -int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags); +int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags); struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); struct socket *sockfd_lookup(int fd, int *err); struct socket *sock_from_file(struct file *file, int *err); -- cgit v1.2.3 From 005e46857ed598bcf76035366cd2841e3b7f8c54 Mon Sep 17 00:00:00 2001 From: Maarten ter Huurne Date: Thu, 17 Mar 2016 15:05:07 +0100 Subject: regulator: act8865: Pass of_node via act8865_regulator_data This makes the code easier to read and it avoids a dynamic memory allocation. Signed-off-by: Maarten ter Huurne Signed-off-by: Mark Brown --- include/linux/regulator/act8865.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/act8865.h b/include/linux/regulator/act8865.h index 2eb386017fa5..113d861a1e4c 100644 --- a/include/linux/regulator/act8865.h +++ b/include/linux/regulator/act8865.h @@ -69,11 +69,13 @@ enum { * @id: regulator id * @name: regulator name * @init_data: regulator init data + * @of_node: device tree node (optional) */ struct act8865_regulator_data { int id; const char *name; struct regulator_init_data *init_data; + struct device_node *of_node; }; /** -- cgit v1.2.3 From 49db08c358873af11ba3c25401de88156fa5d365 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 19 Feb 2016 15:36:07 +0100 Subject: chrdev: emit a warning when we go below dynamic major range Currently a dynamically allocated character device major is taken from 254 and downward. This mechanism is used for RTC, IIO and a few other subsystems. The kernel currently has no check prevening these dynamic allocations from eating into the assigned numbers at 233 and downward. In a recent test it was reported that so many dynamic device majors were used on a test server, that the major number for infiniband (231) was stolen. This occurred when allocating a new major number for GPIO chips. The error messages from the kernel were not helpful. (See: https://lkml.org/lkml/2016/2/14/124) This patch adds a defined lower limit of the dynamic major allocation region will henceforth emit a warning if we start to eat into the assigned numbers. It does not do any semantic changes and will not change the kernels behaviour: numbers will still continue to be stolen, but we will know from dmesg what is going on. This also updates the Documentation/devices.txt to clearly reflect that we are using this range of major numbers for dynamic allocation. Reported-by: Ying Huang Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Alan Cox Cc: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..60082be96de8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2385,6 +2385,8 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, /* fs/char_dev.c */ #define CHRDEV_MAJOR_HASH_SIZE 255 +/* Marks the bottom of the first segment of free char majors */ +#define CHRDEV_MAJOR_DYN_END 234 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int __register_chrdev(unsigned int major, unsigned int baseminor, -- cgit v1.2.3 From b3c1be1b789cca6d3e39c950dfed690f0511fe76 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 22 Jan 2016 11:28:07 -0500 Subject: base: isa: Remove X86_32 dependency Many motherboards utilize a LPC to ISA bridge in order to decode ISA-style port-mapped I/O addresses. This is particularly true for embedded motherboards supporting the PC/104 bus (a bus specification derived from ISA). These motherboards are now commonly running 64-bit x86 processors. The X86_32 dependency should be removed from the ISA bus configuration option in order to support these newer motherboards. A new config option, CONFIG_ISA_BUS, is introduced to allow for the compilation of the ISA bus driver independent of the CONFIG_ISA option. Devices which communicate via ISA-compatible buses can now be supported independent of the dependencies of the CONFIG_ISA option. Signed-off-by: William Breathitt Gray Reviewed-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/isa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/isa.h b/include/linux/isa.h index b0270e3814c8..2a02862775eb 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -22,7 +22,7 @@ struct isa_driver { #define to_isa_driver(x) container_of((x), struct isa_driver, driver) -#ifdef CONFIG_ISA +#ifdef CONFIG_ISA_BUS int isa_register_driver(struct isa_driver *, unsigned int); void isa_unregister_driver(struct isa_driver *); #else -- cgit v1.2.3 From f235541699bcf14fb8be797c6bc1d7106df0eb64 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 22 Jan 2016 01:32:26 -0500 Subject: export.h: allow for per-symbol configurable EXPORT_SYMBOL() Similar to include/generated/autoconf.h, include/generated/autoksyms.h will contain a list of defines for each EXPORT_SYMBOL() that we want active. The format is: #define __KSYM_ 1 This list will be auto-generated with another patch. For now we only include the preprocessor magic to automatically create or omit the corresponding struct kernel_symbol declaration. Given the content of include/generated/autoksyms.h may not be known in advance, an empty file is created early on to let the build proceed. Signed-off-by: Nicolas Pitre Acked-by: Rusty Russell --- include/linux/export.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index 96e45ea463e7..77afdb2a2506 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -38,7 +38,7 @@ extern struct module __this_module; #ifdef CONFIG_MODULES -#ifndef __GENKSYMS__ +#if defined(__KERNEL__) && !defined(__GENKSYMS__) #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to * generate a checksums for some symbols */ @@ -53,7 +53,7 @@ extern struct module __this_module; #endif /* For every exported symbol, place a struct in the __ksymtab section */ -#define __EXPORT_SYMBOL(sym, sec) \ +#define ___EXPORT_SYMBOL(sym, sec) \ extern typeof(sym) sym; \ __CRC_SYMBOL(sym, sec) \ static const char __kstrtab_##sym[] \ @@ -65,6 +65,24 @@ extern struct module __this_module; __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ = { (unsigned long)&sym, __kstrtab_##sym } +#ifdef CONFIG_TRIM_UNUSED_KSYMS + +#include +#include + +#define __EXPORT_SYMBOL(sym, sec) \ + __cond_export_sym(sym, sec, config_enabled(__KSYM_##sym)) +#define __cond_export_sym(sym, sec, conf) \ + ___cond_export_sym(sym, sec, conf) +#define ___cond_export_sym(sym, sec, enabled) \ + __cond_export_sym_##enabled(sym, sec) +#define __cond_export_sym_1(sym, sec) ___EXPORT_SYMBOL(sym, sec) +#define __cond_export_sym_0(sym, sec) /* nothing */ + +#else +#define __EXPORT_SYMBOL ___EXPORT_SYMBOL +#endif + #define EXPORT_SYMBOL(sym) \ __EXPORT_SYMBOL(sym, "") -- cgit v1.2.3 From c1a95fda2a40ae8c7aad3fa44fa7718a3710eb2d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 22 Jan 2016 13:41:57 -0500 Subject: kbuild: add fine grained build dependencies for exported symbols Like with kconfig options, we now have the ability to compile in and out individual EXPORT_SYMBOL() declarations based on the content of include/generated/autoksyms.h. However we don't want the entire world to be rebuilt whenever that file is touched. Let's apply the same build dependency trick used for CONFIG_* symbols where the time stamp of empty files whose paths matching those symbols is used to trigger fine grained rebuilds. In our case the key is the symbol name passed to EXPORT_SYMBOL(). However, unlike config options, we cannot just use fixdep to parse the source code for EXPORT_SYMBOL(ksym) because several variants exist and parsing them all in a separate tool, and keeping it in synch, is not trivially maintainable. Furthermore, there are variants such as EXPORT_SYMBOL_GPL(pci_user_read_config_##size); that are instanciated via a macro for which we can't easily determine the actual exported symbol name(s) short of actually running the preprocessor on them. Storing the symbol name string in a special ELF section doesn't work for targets that output assembly or preprocessed source. So the best way is really to leverage the preprocessor by having it output actual symbol names anchored by a special sequence that can be easily filtered out. Then the list of symbols is simply fed to fixdep to be merged with the other dependencies. That implies the preprocessor is executed twice for each source file. A previous attempt relied on a warning pragma for each EXPORT_SYMBOL() instance that was filtered apart from stderr by the build system with a sed script during the actual compilation pass. Unfortunately the preprocessor/compiler diagnostic output isn't stable between versions and this solution, although more efficient, was deemed too fragile. Because of the lowercasing performed by fixdep, there might be name collisions triggering spurious rebuilds for similar symbols. But this shouldn't be a big issue in practice. (This is the case for CONFIG_* symbols and I didn't want to be different here, whatever the original reason for doing so.) To avoid needless build overhead, the exported symbol name gathering is performed only when CONFIG_TRIM_UNUSED_KSYMS is selected. Signed-off-by: Nicolas Pitre Acked-by: Rusty Russell --- include/linux/export.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index 77afdb2a2506..2f9ccbe6a639 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -65,7 +65,18 @@ extern struct module __this_module; __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ = { (unsigned long)&sym, __kstrtab_##sym } -#ifdef CONFIG_TRIM_UNUSED_KSYMS +#if defined(__KSYM_DEPS__) + +/* + * For fine grained build dependencies, we want to tell the build system + * about each possible exported symbol even if they're not actually exported. + * We use a string pattern that is unlikely to be valid code that the build + * system filters out from the preprocessor output (see ksym_dep_filter + * in scripts/Kbuild.include). + */ +#define __EXPORT_SYMBOL(sym, sec) === __KSYM_##sym === + +#elif defined(CONFIG_TRIM_UNUSED_KSYMS) #include #include -- cgit v1.2.3 From 6c96f05c8bb8bc4177613ef3c23a56b455e75887 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Feb 2016 18:46:24 +0100 Subject: reset: Make [of_]reset_control_get[_foo] functions wrappers With both the regular, _by_index and _optional variants we already have quite a few variants of [of_]reset_control_get[_foo], the upcoming addition of shared reset lines support makes this worse. This commit changes all the variants into wrappers around common core functions. For completeness sake this commit also adds a new devm_get_reset_control_by_index wrapper. Signed-off-by: Hans de Goede Signed-off-by: Philipp Zabel --- include/linux/reset.h | 126 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 90 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index c4c097de0ba9..1bb69a29d6db 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -1,8 +1,8 @@ #ifndef _LINUX_RESET_H_ #define _LINUX_RESET_H_ -struct device; -struct device_node; +#include + struct reset_control; #ifdef CONFIG_RESET_CONTROLLER @@ -12,9 +12,11 @@ int reset_control_assert(struct reset_control *rstc); int reset_control_deassert(struct reset_control *rstc); int reset_control_status(struct reset_control *rstc); -struct reset_control *reset_control_get(struct device *dev, const char *id); +struct reset_control *__of_reset_control_get(struct device_node *node, + const char *id, int index); void reset_control_put(struct reset_control *rstc); -struct reset_control *devm_reset_control_get(struct device *dev, const char *id); +struct reset_control *__devm_reset_control_get(struct device *dev, + const char *id, int index); int __must_check device_reset(struct device *dev); @@ -23,24 +25,6 @@ static inline int device_reset_optional(struct device *dev) return device_reset(dev); } -static inline struct reset_control *reset_control_get_optional( - struct device *dev, const char *id) -{ - return reset_control_get(dev, id); -} - -static inline struct reset_control *devm_reset_control_get_optional( - struct device *dev, const char *id) -{ - return devm_reset_control_get(dev, id); -} - -struct reset_control *of_reset_control_get(struct device_node *node, - const char *id); - -struct reset_control *of_reset_control_get_by_index( - struct device_node *node, int index); - #else static inline int reset_control_reset(struct reset_control *rstc) @@ -77,44 +61,114 @@ static inline int device_reset_optional(struct device *dev) return -ENOTSUPP; } -static inline struct reset_control *__must_check reset_control_get( - struct device *dev, const char *id) +static inline struct reset_control *__of_reset_control_get( + struct device_node *node, + const char *id, int index) { - WARN_ON(1); return ERR_PTR(-EINVAL); } -static inline struct reset_control *__must_check devm_reset_control_get( - struct device *dev, const char *id) +static inline struct reset_control *__devm_reset_control_get( + struct device *dev, + const char *id, int index) { - WARN_ON(1); return ERR_PTR(-EINVAL); } -static inline struct reset_control *reset_control_get_optional( +#endif /* CONFIG_RESET_CONTROLLER */ + +/** + * reset_control_get - Lookup and obtain a reference to a reset controller. + * @dev: device to be reset by the controller + * @id: reset line name + * + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * Use of id names is optional. + */ +static inline struct reset_control *__must_check reset_control_get( struct device *dev, const char *id) { - return ERR_PTR(-ENOTSUPP); +#ifndef CONFIG_RESET_CONTROLLER + WARN_ON(1); +#endif + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0); } -static inline struct reset_control *devm_reset_control_get_optional( +static inline struct reset_control *reset_control_get_optional( struct device *dev, const char *id) { - return ERR_PTR(-ENOTSUPP); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0); } +/** + * of_reset_control_get - Lookup and obtain a reference to a reset controller. + * @node: device to be reset by the controller + * @id: reset line name + * + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * Use of id names is optional. + */ static inline struct reset_control *of_reset_control_get( struct device_node *node, const char *id) { - return ERR_PTR(-ENOTSUPP); + return __of_reset_control_get(node, id, 0); } +/** + * of_reset_control_get_by_index - Lookup and obtain a reference to a reset + * controller by index. + * @node: device to be reset by the controller + * @index: index of the reset controller + * + * This is to be used to perform a list of resets for a device or power domain + * in whatever order. Returns a struct reset_control or IS_ERR() condition + * containing errno. + */ static inline struct reset_control *of_reset_control_get_by_index( - struct device_node *node, int index) + struct device_node *node, int index) { - return ERR_PTR(-ENOTSUPP); + return __of_reset_control_get(node, NULL, index); } -#endif /* CONFIG_RESET_CONTROLLER */ +/** + * devm_reset_control_get - resource managed reset_control_get() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get(). For reset controllers returned from this + * function, reset_control_put() is called automatically on driver detach. + * See reset_control_get() for more information. + */ +static inline struct reset_control *__must_check devm_reset_control_get( + struct device *dev, const char *id) +{ +#ifndef CONFIG_RESET_CONTROLLER + WARN_ON(1); +#endif + return __devm_reset_control_get(dev, id, 0); +} + +static inline struct reset_control *devm_reset_control_get_optional( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0); +} + +/** + * devm_reset_control_get_by_index - resource managed reset_control_get + * @dev: device to be reset by the controller + * @index: index of the reset controller + * + * Managed reset_control_get(). For reset controllers returned from this + * function, reset_control_put() is called automatically on driver detach. + * See reset_control_get() for more information. + */ +static inline struct reset_control *devm_reset_control_get_by_index( + struct device *dev, int index) +{ + return __devm_reset_control_get(dev, NULL, index); +} #endif -- cgit v1.2.3 From c15ddec2ca06076a11195313aa1fce47d2a28c5d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Feb 2016 18:46:25 +0100 Subject: reset: Share struct reset_control between reset_control_get calls Now that struct reset_control no longer stores the device pointer for the device calling reset_control_get we can share a single struct reset_control when multiple calls to reset_control_get are made for the same reset line (same id / index). This is a preparation patch for adding support for shared reset lines. Signed-off-by: Hans de Goede Signed-off-by: Philipp Zabel --- include/linux/reset-controller.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index a3a5bcdb1d02..b91ba932bbd4 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -31,6 +31,7 @@ struct of_phandle_args; * @ops: a pointer to device specific struct reset_control_ops * @owner: kernel module of the reset controller driver * @list: internal list of reset controller devices + * @reset_control_head: head of internal list of requested reset controls * @of_node: corresponding device tree node as phandle target * @of_reset_n_cells: number of cells in reset line specifiers * @of_xlate: translation function to translate from specifier as found in the @@ -41,6 +42,7 @@ struct reset_controller_dev { const struct reset_control_ops *ops; struct module *owner; struct list_head list; + struct list_head reset_control_head; struct device_node *of_node; int of_reset_n_cells; int (*of_xlate)(struct reset_controller_dev *rcdev, -- cgit v1.2.3 From 0b52297f2288ca239e598afe6c92db83d8d2bfcd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Feb 2016 18:46:26 +0100 Subject: reset: Add support for shared reset controls In some SoCs some hw-blocks share a reset control. Add support for this setup by adding new: reset_control_get_shared() devm_reset_control_get_shared() devm_reset_control_get_shared_by_index() methods to get a reset_control. Note that this patch omits adding of_ variants, if these are needed later they can be easily added. This patch also changes the behavior of the existing exclusive reset_control_get() variants, if these are now called more then once for the same reset_control they will return -EBUSY. To catch existing drivers triggering this error (there should not be any) a WARN_ON(1) is added in this path. When a reset_control is shared, the behavior of reset_control_assert / deassert is changed, for shared reset_controls these will work like the clock-enable/disable and regulator-on/off functions. They will keep a deassert_count, and only (re-)assert the reset after reset_control_assert has been called as many times as reset_control_deassert was called. Calling reset_control_assert without first calling reset_control_deassert is not allowed on a shared reset control. Calling reset_control_reset is also not allowed on a shared reset control. Signed-off-by: Hans de Goede Signed-off-by: Philipp Zabel --- include/linux/reset.h | 96 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 81 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index 1bb69a29d6db..a552134a209e 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -13,10 +13,10 @@ int reset_control_deassert(struct reset_control *rstc); int reset_control_status(struct reset_control *rstc); struct reset_control *__of_reset_control_get(struct device_node *node, - const char *id, int index); + const char *id, int index, int shared); void reset_control_put(struct reset_control *rstc); struct reset_control *__devm_reset_control_get(struct device *dev, - const char *id, int index); + const char *id, int index, int shared); int __must_check device_reset(struct device *dev); @@ -63,14 +63,14 @@ static inline int device_reset_optional(struct device *dev) static inline struct reset_control *__of_reset_control_get( struct device_node *node, - const char *id, int index) + const char *id, int index, int shared) { return ERR_PTR(-EINVAL); } static inline struct reset_control *__devm_reset_control_get( struct device *dev, - const char *id, int index) + const char *id, int index, int shared) { return ERR_PTR(-EINVAL); } @@ -78,11 +78,17 @@ static inline struct reset_control *__devm_reset_control_get( #endif /* CONFIG_RESET_CONTROLLER */ /** - * reset_control_get - Lookup and obtain a reference to a reset controller. + * reset_control_get - Lookup and obtain an exclusive reference to a + * reset controller. * @dev: device to be reset by the controller * @id: reset line name * * Returns a struct reset_control or IS_ERR() condition containing errno. + * If this function is called more then once for the same reset_control it will + * return -EBUSY. + * + * See reset_control_get_shared for details on shared references to + * reset-controls. * * Use of id names is optional. */ @@ -92,17 +98,46 @@ static inline struct reset_control *__must_check reset_control_get( #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } static inline struct reset_control *reset_control_get_optional( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } /** - * of_reset_control_get - Lookup and obtain a reference to a reset controller. + * reset_control_get_shared - Lookup and obtain a shared reference to a + * reset controller. + * @dev: device to be reset by the controller + * @id: reset line name + * + * Returns a struct reset_control or IS_ERR() condition containing errno. + * This function is intended for use with reset-controls which are shared + * between hardware-blocks. + * + * When a reset-control is shared, the behavior of reset_control_assert / + * deassert is changed, the reset-core will keep track of a deassert_count + * and only (re-)assert the reset after reset_control_assert has been called + * as many times as reset_control_deassert was called. Also see the remark + * about shared reset-controls in the reset_control_assert docs. + * + * Calling reset_control_assert without first calling reset_control_deassert + * is not allowed on a shared reset control. Calling reset_control_reset is + * also not allowed on a shared reset control. + * + * Use of id names is optional. + */ +static inline struct reset_control *reset_control_get_shared( + struct device *dev, const char *id) +{ + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); +} + +/** + * of_reset_control_get - Lookup and obtain an exclusive reference to a + * reset controller. * @node: device to be reset by the controller * @id: reset line name * @@ -113,12 +148,12 @@ static inline struct reset_control *reset_control_get_optional( static inline struct reset_control *of_reset_control_get( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0); + return __of_reset_control_get(node, id, 0, 0); } /** - * of_reset_control_get_by_index - Lookup and obtain a reference to a reset - * controller by index. + * of_reset_control_get_by_index - Lookup and obtain an exclusive reference to + * a reset controller by index. * @node: device to be reset by the controller * @index: index of the reset controller * @@ -129,7 +164,7 @@ static inline struct reset_control *of_reset_control_get( static inline struct reset_control *of_reset_control_get_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index); + return __of_reset_control_get(node, NULL, index, 0); } /** @@ -147,13 +182,13 @@ static inline struct reset_control *__must_check devm_reset_control_get( #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __devm_reset_control_get(dev, id, 0); + return __devm_reset_control_get(dev, id, 0, 0); } static inline struct reset_control *devm_reset_control_get_optional( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0); + return __devm_reset_control_get(dev, id, 0, 0); } /** @@ -168,7 +203,38 @@ static inline struct reset_control *devm_reset_control_get_optional( static inline struct reset_control *devm_reset_control_get_by_index( struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index); + return __devm_reset_control_get(dev, NULL, index, 0); +} + +/** + * devm_reset_control_get_shared - resource managed reset_control_get_shared() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_shared(). For reset controllers returned from + * this function, reset_control_put() is called automatically on driver detach. + * See reset_control_get_shared() for more information. + */ +static inline struct reset_control *devm_reset_control_get_shared( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, 1); +} + +/** + * devm_reset_control_get_shared_by_index - resource managed + * reset_control_get_shared + * @dev: device to be reset by the controller + * @index: index of the reset controller + * + * Managed reset_control_get_shared(). For reset controllers returned from + * this function, reset_control_put() is called automatically on driver detach. + * See reset_control_get_shared() for more information. + */ +static inline struct reset_control *devm_reset_control_get_shared_by_index( + struct device *dev, int index) +{ + return __devm_reset_control_get(dev, NULL, index, 1); } #endif -- cgit v1.2.3 From 44debe7a123cc760fc90ccbe253210798c917fa7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 30 Mar 2016 11:26:35 +0200 Subject: vgacon: dummy implementation for vgacon_text_force MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to ditch a ton of ugly #ifdefs from a bunch of drm modeset drivers. v2: Make the dummy function actually return a sane value, spotted by Ville. v3: Because the patch is still in limbo there's no more drivers to convert, noticed by Emil. v4: Rebase once more, because hooray. I'll just go ahead an apply this one later on to drm-misc. Cc: Emil Velikov Cc: Ville Syrjälä Cc: Andrew Morton Cc: Greg Kroah-Hartman Reviewed-by: Emil Velikov Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- include/linux/console.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index ea731af2451e..e49cc1ef19be 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -191,6 +191,8 @@ void vcs_remove_sysfs(int index); #ifdef CONFIG_VGA_CONSOLE extern bool vgacon_text_force(void); +#else +static inline bool vgacon_text_force(void) { return false; } #endif #endif /* _LINUX_CONSOLE_H */ -- cgit v1.2.3 From 6a0028b3dd67b86d7265ed873c8738743adec855 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 30 Mar 2016 12:04:30 -0700 Subject: regulator: Deprecate regulator_can_change_voltage() All current users of regulator_can_change_voltage() are abusing it, using it to wrap a call to regulator_set_voltage() on probe without any alternative handling for fixed voltages. Drivers should only be using regulator_set_voltage() if they need to vary voltages at runtime, fixed voltages should normally be set via machine constraints, and calling regulator_set_voltage() on a regulator which can't be varied will succeed if the current voltage is within the range requested so users shouldn't worry if they have permission to vary normally. Deprecate the API to try to stop any new users appearing while we fix the current callers. Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 48603506f8de..80dc4e51d14a 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -224,7 +224,7 @@ int regulator_bulk_force_disable(int num_consumers, void regulator_bulk_free(int num_consumers, struct regulator_bulk_data *consumers); -int regulator_can_change_voltage(struct regulator *regulator); +int __deprecated regulator_can_change_voltage(struct regulator *regulator); int regulator_count_voltages(struct regulator *regulator); int regulator_list_voltage(struct regulator *regulator, unsigned selector); int regulator_is_supported_voltage(struct regulator *regulator, @@ -436,7 +436,7 @@ static inline void regulator_bulk_free(int num_consumers, { } -static inline int regulator_can_change_voltage(struct regulator *regulator) +static inline int __deprecated regulator_can_change_voltage(struct regulator *regulator) { return 0; } -- cgit v1.2.3 From e8b123e6008480b2b8d80dae060315d84b79f4bb Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 24 Dec 2015 00:28:38 -0800 Subject: soc: qcom: smem_state: Add stubs for disabled smem_state Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smem_state.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smem_state.h b/include/linux/soc/qcom/smem_state.h index f35e1512fcaa..7b88697929e9 100644 --- a/include/linux/soc/qcom/smem_state.h +++ b/include/linux/soc/qcom/smem_state.h @@ -1,12 +1,17 @@ #ifndef __QCOM_SMEM_STATE__ #define __QCOM_SMEM_STATE__ +#include + +struct device_node; struct qcom_smem_state; struct qcom_smem_state_ops { int (*update_bits)(void *, u32, u32); }; +#ifdef CONFIG_QCOM_SMEM_STATE + struct qcom_smem_state *qcom_smem_state_get(struct device *dev, const char *con_id, unsigned *bit); void qcom_smem_state_put(struct qcom_smem_state *); @@ -15,4 +20,34 @@ int qcom_smem_state_update_bits(struct qcom_smem_state *state, u32 mask, u32 val struct qcom_smem_state *qcom_smem_state_register(struct device_node *of_node, const struct qcom_smem_state_ops *ops, void *data); void qcom_smem_state_unregister(struct qcom_smem_state *state); +#else + +static inline struct qcom_smem_state *qcom_smem_state_get(struct device *dev, + const char *con_id, unsigned *bit) +{ + return ERR_PTR(-EINVAL); +} + +static inline void qcom_smem_state_put(struct qcom_smem_state *state) +{ +} + +static inline int qcom_smem_state_update_bits(struct qcom_smem_state *state, + u32 mask, u32 value) +{ + return -EINVAL; +} + +static inline struct qcom_smem_state *qcom_smem_state_register(struct device_node *of_node, + const struct qcom_smem_state_ops *ops, void *data) +{ + return ERR_PTR(-EINVAL); +} + +static inline void qcom_smem_state_unregister(struct qcom_smem_state *state) +{ +} + +#endif + #endif -- cgit v1.2.3 From 39f0db298e7c02a29371fb39cabdd5d76e6b726c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 17 Feb 2016 22:39:02 -0800 Subject: soc: qcom: smd: Introduce callback setter Introduce a setter for the callback function pointer to clarify the locking around the operation and to reduce some duplication. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index d0cb6d189a0a..65a64fcdb1aa 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -26,6 +26,8 @@ struct qcom_smd_device { struct qcom_smd_channel *channel; }; +typedef int (*qcom_smd_cb_t)(struct qcom_smd_device *, const void *, size_t); + /** * struct qcom_smd_driver - smd driver struct * @driver: underlying device driver @@ -42,7 +44,7 @@ struct qcom_smd_driver { int (*probe)(struct qcom_smd_device *dev); void (*remove)(struct qcom_smd_device *dev); - int (*callback)(struct qcom_smd_device *, const void *, size_t); + qcom_smd_cb_t callback; }; int qcom_smd_driver_register(struct qcom_smd_driver *drv); -- cgit v1.2.3 From 028021d29ea069390e1f60c6aa5b3511d218454b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 17 Feb 2016 22:39:06 -0800 Subject: soc: qcom: smd: Support opening additional channels With the qcom_smd_open_channel() API we allow SMD devices to open additional SMD channels, to allow implementation of multi-channel SMD devices - like Bluetooth. Channels are opened from the same edge as the calling SMD device is tied to. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index 65a64fcdb1aa..bd51c8a9d807 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -56,4 +56,8 @@ void qcom_smd_driver_unregister(struct qcom_smd_driver *drv); int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); +struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_device *sdev, + const char *name, + qcom_smd_cb_t cb); + #endif -- cgit v1.2.3 From b8a7a3a6674725d7ca0ff6e322f6c1cab6e6a11d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2016 14:38:37 +0100 Subject: posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- include/linux/fs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..329ed372d708 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -577,6 +577,18 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +static inline struct posix_acl * +uncached_acl_sentinel(struct task_struct *task) +{ + return (void *)task + 1; +} + +static inline bool +is_uncached_acl(struct posix_acl *acl) +{ + return (long)acl & 1; +} + #define IOP_FASTPERM 0x0001 #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 -- cgit v1.2.3 From 04c57f4501909b60353031cfe5b991751d745658 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2016 14:38:38 +0100 Subject: posix_acl: Unexport acl_by_type and make it static acl_by_type(inode, type) returns a pointer to either inode->i_acl or inode->i_default_acl depending on type. This is useful in fs/posix_acl.c, but should never have been visible outside that file. Signed-off-by: Andreas Gruenbacher Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/posix_acl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 3e96a6a76103..5b5a80cc5926 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -99,7 +99,6 @@ extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, extern int simple_set_acl(struct inode *, struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *); -struct posix_acl **acl_by_type(struct inode *inode, int type); struct posix_acl *get_cached_acl(struct inode *inode, int type); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl); -- cgit v1.2.3 From 1879445dfa7bbd6fe21b09c5cc72f4934798afed Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 28 Mar 2016 06:41:30 +0000 Subject: perf/core: Set event's default ::overflow_handler() Set a default event->overflow_handler in perf_event_alloc() so don't need to check event->overflow_handler in __perf_event_overflow(). Following commits can give a different default overflow_handler. Initial idea comes from Peter: http://lkml.kernel.org/r/20130708121557.GA17211@twins.programming.kicks-ass.net Since the default value of event->overflow_handler is not NULL, existing 'if (!overflow_handler)' checks need to be changed. is_default_overflow_handler() is introduced for this. No extra performance overhead is introduced into the hot path because in the original code we still need to read this handler from memory. A conditional branch is avoided so actually we remove some instructions. Signed-off-by: Wang Nan Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Arnaldo Carvalho de Melo Cc: Brendan Gregg Cc: He Kuang Cc: Jiri Olsa Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Zefan Li Link: http://lkml.kernel.org/r/1459147292-239310-3-git-send-email-wangnan0@huawei.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 15588d4c581d..4065ca2d7149 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -838,6 +838,12 @@ extern void perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); +static inline bool +is_default_overflow_handler(struct perf_event *event) +{ + return (event->overflow_handler == perf_event_output); +} + extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, -- cgit v1.2.3 From 274529ba9bda86c91c2c06da3a641aaf617dd30f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 21 Mar 2016 19:46:04 -0700 Subject: rcu: Consolidate dumping of ftrace buffer This commit consolidates a couple definitions and several calls for single-shot ftrace-buffer dumping. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 2657aff2725b..45de591657a6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1144,4 +1144,17 @@ static inline void rcu_sysidle_force_exit(void) #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ +/* + * Dump the ftrace buffer, but only one time per callsite per boot. + */ +#define rcu_ftrace_dump(oops_dump_mode) \ +do { \ + static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \ + \ + if (!atomic_read(&___rfd_beenhere) && \ + !atomic_xchg(&___rfd_beenhere, 1)) \ + ftrace_dump(oops_dump_mode); \ +} while (0) + + #endif /* __LINUX_RCUPDATE_H */ -- cgit v1.2.3 From 293e2421fe25839500207eda123cc4475f8d17b8 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 23 Mar 2016 23:11:48 +0800 Subject: rcu: Remove superfluous versions of rcu_read_lock_sched_held() Currently, we have four versions of rcu_read_lock_sched_held(), depending on the combined choices on PREEMPT_COUNT and DEBUG_LOCK_ALLOC. However, there is an existing function preemptible() that already distinguishes between the PREEMPT_COUNT=y and PREEMPT_COUNT=n cases, and allows these four implementations to be consolidated down to two. This commit therefore uses preemptible() to achieve this consolidation. Note that there could be a small performance regression in the case of CONFIG_DEBUG_LOCK_ALLOC=y && PREEMPT_COUNT=n. However, given the overhead associated with CONFIG_DEBUG_LOCK_ALLOC=y, this should be down in the noise. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 45de591657a6..5f1533e3d032 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -508,14 +508,7 @@ int rcu_read_lock_bh_held(void); * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side * critical section unless it can prove otherwise. */ -#ifdef CONFIG_PREEMPT_COUNT int rcu_read_lock_sched_held(void); -#else /* #ifdef CONFIG_PREEMPT_COUNT */ -static inline int rcu_read_lock_sched_held(void) -{ - return 1; -} -#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */ #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -532,18 +525,10 @@ static inline int rcu_read_lock_bh_held(void) return 1; } -#ifdef CONFIG_PREEMPT_COUNT static inline int rcu_read_lock_sched_held(void) { - return preempt_count() != 0 || irqs_disabled(); + return !preemptible(); } -#else /* #ifdef CONFIG_PREEMPT_COUNT */ -static inline int rcu_read_lock_sched_held(void) -{ - return 1; -} -#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */ - #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU -- cgit v1.2.3 From 291783b8ad77a83a6fdf91d55eee7f1ad72ed4d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jan 2016 13:43:30 -0800 Subject: rcutorture: Expedited-GP batch progress access to torturing This commit provides rcu_exp_batches_completed() and rcu_exp_batches_completed_sched() functions to allow torture-test modules to check how many expedited grace period batches have completed. These are analogous to the existing rcu_batches_completed(), rcu_batches_completed_bh(), and rcu_batches_completed_sched() functions. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 16 ++++++++++++++++ include/linux/rcutree.h | 2 ++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 64809aea661c..93aea75029fb 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -149,6 +149,22 @@ static inline unsigned long rcu_batches_completed_sched(void) return 0; } +/* + * Return the number of expedited grace periods completed. + */ +static inline unsigned long rcu_exp_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of expedited sched grace periods completed. + */ +static inline unsigned long rcu_exp_batches_completed_sched(void) +{ + return 0; +} + static inline void rcu_force_quiescent_state(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index ad1eda9fa4da..5043cb823fb2 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -87,6 +87,8 @@ unsigned long rcu_batches_started_sched(void); unsigned long rcu_batches_completed(void); unsigned long rcu_batches_completed_bh(void); unsigned long rcu_batches_completed_sched(void); +unsigned long rcu_exp_batches_completed(void); +unsigned long rcu_exp_batches_completed_sched(void); void show_rcu_gp_kthreads(void); void rcu_force_quiescent_state(void); -- cgit v1.2.3 From d18d12d0ff07c47fb913f297c174f30a3f96042d Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 31 Mar 2016 15:51:32 +0200 Subject: lib/proportions: Remove unused code By accident I stumbled across code that is no longer used. According to git grep, the global functions in lib/proportions.c are not used anywhere. This patch removes the old, unused code. Peter Zijlstra further commented: "Ah indeed, that got replaced with the flex proportion code a while back." Signed-off-by: Richard Cochran Acked-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4265b49bed713fbe3faaf8c05da0e1792f09c0b3.1459432020.git.rcochran@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/proportions.h | 137 -------------------------------------------- include/linux/sched.h | 1 - 2 files changed, 138 deletions(-) delete mode 100644 include/linux/proportions.h (limited to 'include/linux') diff --git a/include/linux/proportions.h b/include/linux/proportions.h deleted file mode 100644 index 21221338ad18..000000000000 --- a/include/linux/proportions.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * FLoating proportions - * - * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra - * - * This file contains the public data structure and API definitions. - */ - -#ifndef _LINUX_PROPORTIONS_H -#define _LINUX_PROPORTIONS_H - -#include -#include -#include -#include - -struct prop_global { - /* - * The period over which we differentiate - * - * period = 2^shift - */ - int shift; - /* - * The total event counter aka 'time'. - * - * Treated as an unsigned long; the lower 'shift - 1' bits are the - * counter bits, the remaining upper bits the period counter. - */ - struct percpu_counter events; -}; - -/* - * global proportion descriptor - * - * this is needed to consistently flip prop_global structures. - */ -struct prop_descriptor { - int index; - struct prop_global pg[2]; - struct mutex mutex; /* serialize the prop_global switch */ -}; - -int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp); -void prop_change_shift(struct prop_descriptor *pd, int new_shift); - -/* - * ----- PERCPU ------ - */ - -struct prop_local_percpu { - /* - * the local events counter - */ - struct percpu_counter events; - - /* - * snapshot of the last seen global state - */ - int shift; - unsigned long period; - raw_spinlock_t lock; /* protect the snapshot state */ -}; - -int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp); -void prop_local_destroy_percpu(struct prop_local_percpu *pl); -void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl); -void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl, - long *numerator, long *denominator); - -static inline -void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) -{ - unsigned long flags; - - local_irq_save(flags); - __prop_inc_percpu(pd, pl); - local_irq_restore(flags); -} - -/* - * Limit the time part in order to ensure there are some bits left for the - * cycle counter and fraction multiply. - */ -#if BITS_PER_LONG == 32 -#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4) -#else -#define PROP_MAX_SHIFT (BITS_PER_LONG/2) -#endif - -#define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1) -#define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT) - -void __prop_inc_percpu_max(struct prop_descriptor *pd, - struct prop_local_percpu *pl, long frac); - - -/* - * ----- SINGLE ------ - */ - -struct prop_local_single { - /* - * the local events counter - */ - unsigned long events; - - /* - * snapshot of the last seen global state - * and a lock protecting this state - */ - unsigned long period; - int shift; - raw_spinlock_t lock; /* protect the snapshot state */ -}; - -#define INIT_PROP_LOCAL_SINGLE(name) \ -{ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ -} - -int prop_local_init_single(struct prop_local_single *pl); -void prop_local_destroy_single(struct prop_local_single *pl); -void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl); -void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl, - long *numerator, long *denominator); - -static inline -void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) -{ - unsigned long flags; - - local_irq_save(flags); - __prop_inc_single(pd, pl); - local_irq_restore(flags); -} - -#endif /* _LINUX_PROPORTIONS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 60bba7e032dc..6dd25d1869b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -40,7 +40,6 @@ struct sched_param { #include #include #include -#include #include #include #include -- cgit v1.2.3 From 1ce15ef4f60529cf1313f80f4338c88bd65cc572 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 22 Mar 2016 20:03:16 -0400 Subject: module: preserve Elf information for livepatch modules For livepatch modules, copy Elf section, symbol, and string information from the load_info struct in the module loader. Persist copies of the original symbol table and string table. Livepatch manages its own relocation sections in order to reuse module loader code to write relocations. Livepatch modules must preserve Elf information such as section indices in order to apply livepatch relocation sections using the module loader's apply_relocate_add() function. In order to apply livepatch relocation sections, livepatch modules must keep a complete copy of their original symbol table in memory. Normally, a stripped down copy of a module's symbol table (containing only "core" symbols) is made available through module->core_symtab. But for livepatch modules, the symbol table copied into memory on module load must be exactly the same as the symbol table produced when the patch module was compiled. This is because the relocations in each livepatch relocation section refer to their respective symbols with their symbol indices, and the original symbol indices (and thus the symtab ordering) must be preserved in order for apply_relocate_add() to find the right symbol. Signed-off-by: Jessica Yu Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Acked-by: Rusty Russell Reviewed-by: Rusty Russell Signed-off-by: Jiri Kosina --- include/linux/module.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 2bb0c3085706..3daf2b3a09d2 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -330,6 +330,15 @@ struct mod_kallsyms { char *strtab; }; +#ifdef CONFIG_LIVEPATCH +struct klp_modinfo { + Elf_Ehdr hdr; + Elf_Shdr *sechdrs; + char *secstrings; + unsigned int symndx; +}; +#endif + struct module { enum module_state state; @@ -456,7 +465,11 @@ struct module { #endif #ifdef CONFIG_LIVEPATCH + bool klp; /* Is this a livepatch module? */ bool klp_alive; + + /* Elf information */ + struct klp_modinfo *klp_info; #endif #ifdef CONFIG_MODULE_UNLOAD @@ -630,6 +643,18 @@ static inline bool module_requested_async_probing(struct module *module) return module && module->async_probe_requested; } +#ifdef CONFIG_LIVEPATCH +static inline bool is_livepatch_module(struct module *mod) +{ + return mod->klp; +} +#else /* !CONFIG_LIVEPATCH */ +static inline bool is_livepatch_module(struct module *mod) +{ + return false; +} +#endif /* CONFIG_LIVEPATCH */ + #else /* !CONFIG_MODULES... */ /* Given an address, look for it in the exception tables. */ -- cgit v1.2.3 From 425595a7fc2096ab46c741b5ed5372c5ab5bbeac Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 22 Mar 2016 20:03:18 -0400 Subject: livepatch: reuse module loader code to write relocations Reuse module loader code to write relocations, thereby eliminating the need for architecture specific relocation code in livepatch. Specifically, reuse the apply_relocate_add() function in the module loader to write relocations instead of duplicating functionality in livepatch's arch-dependent klp_write_module_reloc() function. In order to accomplish this, livepatch modules manage their own relocation sections (marked with the SHF_RELA_LIVEPATCH section flag) and livepatch-specific symbols (marked with SHN_LIVEPATCH symbol section index). To apply livepatch relocation sections, livepatch symbols referenced by relocs are resolved and then apply_relocate_add() is called to apply those relocations. In addition, remove x86 livepatch relocation code and the s390 klp_write_module_reloc() function stub. They are no longer needed since relocation work has been offloaded to module loader. Lastly, mark the module as a livepatch module so that the module loader canappropriately identify and initialize it. Signed-off-by: Jessica Yu Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf Acked-by: Heiko Carstens # for s390 changes Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index bd830d590465..0933ca47791c 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -64,28 +64,9 @@ struct klp_func { struct list_head stack_node; }; -/** - * struct klp_reloc - relocation structure for live patching - * @loc: address where the relocation will be written - * @sympos: position in kallsyms to disambiguate symbols (optional) - * @type: ELF relocation type - * @name: name of the referenced symbol (for lookup/verification) - * @addend: offset from the referenced symbol - * @external: symbol is either exported or within the live patch module itself - */ -struct klp_reloc { - unsigned long loc; - unsigned long sympos; - unsigned long type; - const char *name; - int addend; - int external; -}; - /** * struct klp_object - kernel object structure for live patching * @name: module name (or NULL for vmlinux) - * @relocs: relocation entries to be applied at load time * @funcs: function entries for functions to be patched in the object * @kobj: kobject for sysfs resources * @mod: kernel module associated with the patched object @@ -95,7 +76,6 @@ struct klp_reloc { struct klp_object { /* external */ const char *name; - struct klp_reloc *relocs; struct klp_func *funcs; /* internal */ -- cgit v1.2.3 From 0bed612be638e41456cd8cb270a2b411a5b43d63 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 2 Apr 2016 01:08:43 +0200 Subject: cpufreq: sched: Helpers to add and remove update_util hooks Replace the single helper for adding and removing cpufreq utilization update hooks, cpufreq_set_update_util_data(), with a pair of helpers, cpufreq_add_update_util_hook() and cpufreq_remove_update_util_hook(), and modify the users of cpufreq_set_update_util_data() accordingly. With the new helpers, the code using them doesn't need to worry about the internals of struct update_util_data and in particular it doesn't need to worry about populating the func field in it properly upfront. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) --- include/linux/sched.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 60bba7e032dc..1b8825922597 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3240,7 +3240,10 @@ struct update_util_data { u64 time, unsigned long util, unsigned long max); }; -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); +void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, + void (*func)(struct update_util_data *data, u64 time, + unsigned long util, unsigned long max)); +void cpufreq_remove_update_util_hook(int cpu); #endif /* CONFIG_CPU_FREQ */ #endif -- cgit v1.2.3 From 66893b6ac9952ec9d9409e9d85eaacf37bba8d15 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Mar 2016 02:50:45 +0100 Subject: cpufreq: Move governor attribute set headers to cpufreq.h Move definitions and function headers related to struct gov_attr_set to include/linux/cpufreq.h so they can be used by (future) goverernors located outside of drivers/cpufreq/. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 718e8725de8a..74f5cfff2b52 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -462,6 +462,29 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor); struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); +/* Governor attribute set */ +struct gov_attr_set { + struct kobject kobj; + struct list_head policy_list; + struct mutex update_lock; + int usage_count; +}; + +/* sysfs ops for cpufreq governors */ +extern const struct sysfs_ops governor_sysfs_ops; + +void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); +void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); +unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); + +/* Governor sysfs attribute */ +struct governor_attr { + struct attribute attr; + ssize_t (*show)(struct gov_attr_set *attr_set, char *buf); + ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf, + size_t count); +}; + /********************************************************************* * FREQUENCY TABLE HELPERS * *********************************************************************/ -- cgit v1.2.3 From 379480d8258056bfdbaa65e4d3f024bb5b34b52b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Mar 2016 02:51:56 +0100 Subject: cpufreq: Move governor symbols to cpufreq.h Move definitions of symbols related to transition latency and sampling rate to include/linux/cpufreq.h so they can be used by (future) goverernors located outside of drivers/cpufreq/. No functional changes. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 74f5cfff2b52..2b4f248b8ef1 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -426,6 +426,20 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, #define CPUFREQ_POLICY_POWERSAVE (1) #define CPUFREQ_POLICY_PERFORMANCE (2) +/* + * The polling frequency depends on the capability of the processor. Default + * polling frequency is 1000 times the transition latency of the processor. The + * ondemand governor will work on any processor with transition latency <= 10ms, + * using appropriate sampling rate. + * + * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) + * the ondemand governor will not work. All times here are in us (microseconds). + */ +#define MIN_SAMPLING_RATE_RATIO (2) +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (20) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + /* Governor Events */ #define CPUFREQ_GOV_START 1 #define CPUFREQ_GOV_STOP 2 -- cgit v1.2.3 From b7898fda5bc7e786e76ce24fbd2ec993b08ec518 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Mar 2016 03:47:49 +0200 Subject: cpufreq: Support for fast frequency switching Modify the ACPI cpufreq driver to provide a method for switching CPU frequencies from interrupt context and update the cpufreq core to support that method if available. Introduce a new cpufreq driver callback, ->fast_switch, to be invoked for frequency switching from interrupt context by (future) governors supporting that feature via (new) helper function cpufreq_driver_fast_switch(). Add two new policy flags, fast_switch_possible, to be set by the cpufreq driver if fast frequency switching can be used for the given policy and fast_switch_enabled, to be set by the governor if it is going to use fast frequency switching for the given policy. Also add a helper for setting the latter. Since fast frequency switching is inherently incompatible with cpufreq transition notifiers, make it possible to set the fast_switch_enabled only if there are no transition notifiers already registered and make the registration of new transition notifiers fail if fast_switch_enabled is set for at least one policy. Implement the ->fast_switch callback in the ACPI cpufreq driver and make it set fast_switch_possible during policy initialization as appropriate. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 2b4f248b8ef1..55e69ebb035c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -102,6 +102,17 @@ struct cpufreq_policy { */ struct rw_semaphore rwsem; + /* + * Fast switch flags: + * - fast_switch_possible should be set by the driver if it can + * guarantee that frequency can be changed on any CPU sharing the + * policy and that the change will affect all of the policy CPUs then. + * - fast_switch_enabled is to be set by governors that support fast + * freqnency switching with the help of cpufreq_enable_fast_switch(). + */ + bool fast_switch_possible; + bool fast_switch_enabled; + /* Synchronization for frequency transitions */ bool transition_ongoing; /* Tracks transition status */ spinlock_t transition_lock; @@ -156,6 +167,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); +void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); #else static inline unsigned int cpufreq_get(unsigned int cpu) { @@ -236,6 +248,8 @@ struct cpufreq_driver { unsigned int relation); /* Deprecated */ int (*target_index)(struct cpufreq_policy *policy, unsigned int index); + unsigned int (*fast_switch)(struct cpufreq_policy *policy, + unsigned int target_freq); /* * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION * unset. @@ -464,6 +478,8 @@ struct cpufreq_governor { }; /* Pass a target to the cpufreq driver */ +unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq); int cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); -- cgit v1.2.3 From ee2ae1ed46251dcbdcc2c59b5e30f664ddfbacb1 Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Fri, 1 Apr 2016 11:37:33 +0200 Subject: stmmac: add new DT platform entries for GMAC4 This is to support the snps,dwmac-4.00 and snps,dwmac-4.10a and related features on the platform driver. See binding doc for further details. Signed-off-by: Giuseppe Cavallaro Signed-off-by: Alexandre TORGUE Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index e6bc30a42a74..ffdaca9c01af 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -137,5 +137,7 @@ struct plat_stmmacenet_data { void (*exit)(struct platform_device *pdev, void *priv); void *bsp_priv; struct stmmac_axi *axi; + int has_gmac4; + bool tso_en; }; #endif -- cgit v1.2.3 From 8cb359e3a1f6318f971bec281623613f48b711be Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 23 Mar 2016 12:34:41 +0000 Subject: iio: buffer: add missing descriptions in iio_buffer_access_funcs The members buffer_group and attrs of iio_buffer_access_funcs have no descriptions for the documentation. Adding them. Fixes: 08e7e0adaa17 ("iio: buffer: Allocate standard attributes in the core") Signed-off-by: Luis de Bethencourt Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 2ec3ad58e8a0..70a5164f4728 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -83,10 +83,12 @@ struct iio_buffer_access_funcs { * @access: [DRIVER] buffer access functions associated with the * implementation. * @scan_el_dev_attr_list:[INTERN] list of scan element related attributes. + * @buffer_group: [INTERN] attributes of the buffer group * @scan_el_group: [DRIVER] attribute group for those attributes not * created from the iio_chan_info array. * @pollq: [INTERN] wait queue to allow for polling on the buffer. * @stufftoread: [INTERN] flag to indicate new data. + * @attrs: [INTERN] standard attributes of the buffer * @demux_list: [INTERN] list of operations required to demux the scan. * @demux_bounce: [INTERN] buffer for doing gather from incoming scan. * @buffer_list: [INTERN] entry in the devices list of current buffers. -- cgit v1.2.3 From fddcca5107051adf9e4481d2a79ae0616577fd2c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Feb 2016 13:20:28 +0100 Subject: mtd: avoid stack overflow in MTD CFI code When map_word gets too large, we use a lot of kernel stack, and for MTD_MAP_BANK_WIDTH_32, this means we use more than the recommended 1024 bytes in a number of functions: drivers/mtd/chips/cfi_cmdset_0020.c: In function 'cfi_staa_write_buffers': drivers/mtd/chips/cfi_cmdset_0020.c:651:1: warning: the frame size of 1336 bytes is larger than 1024 bytes [-Wframe-larger-than=] drivers/mtd/chips/cfi_cmdset_0020.c: In function 'cfi_staa_erase_varsize': drivers/mtd/chips/cfi_cmdset_0020.c:972:1: warning: the frame size of 1208 bytes is larger than 1024 bytes [-Wframe-larger-than=] drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer': drivers/mtd/chips/cfi_cmdset_0001.c:1835:1: warning: the frame size of 1240 bytes is larger than 1024 bytes [-Wframe-larger-than=] This can be avoided if all operations on the map word are done indirectly and the stack gets reused between the calls. We can mostly achieve this by selecting MTD_COMPLEX_MAPPINGS whenever MTD_MAP_BANK_WIDTH_32 is set, but for the case that no other bank width is enabled, we also need to use a non-constant map_bankwidth() to convince the compiler to use less stack. Signed-off-by: Arnd Bergmann [Brian: this patch mostly achieves its goal by forcing MTD_COMPLEX_MAPPINGS (and the accompanying indirection) for 256-bit mappings; the rest of the change is mostly a wash, though it helps reduce stack size slightly. If we really care about supporting 256-bit mappings though, we should consider rewriting some of this code to avoid keeping and assigning so many 256-bit objects on the stack.] Signed-off-by: Brian Norris --- include/linux/mtd/map.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 5e0eb7ccabd4..3aa56e3104bb 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -122,18 +122,13 @@ #endif #ifdef CONFIG_MTD_MAP_BANK_WIDTH_32 -# ifdef map_bankwidth -# undef map_bankwidth -# define map_bankwidth(map) ((map)->bankwidth) -# undef map_bankwidth_is_large -# define map_bankwidth_is_large(map) (map_bankwidth(map) > BITS_PER_LONG/8) -# undef map_words -# define map_words(map) map_calc_words(map) -# else -# define map_bankwidth(map) 32 -# define map_bankwidth_is_large(map) (1) -# define map_words(map) map_calc_words(map) -# endif +/* always use indirect access for 256-bit to preserve kernel stack */ +# undef map_bankwidth +# define map_bankwidth(map) ((map)->bankwidth) +# undef map_bankwidth_is_large +# define map_bankwidth_is_large(map) (map_bankwidth(map) > BITS_PER_LONG/8) +# undef map_words +# define map_words(map) map_calc_words(map) #define map_bankwidth_is_32(map) (map_bankwidth(map) == 32) #undef MAX_MAP_BANKWIDTH #define MAX_MAP_BANKWIDTH 32 -- cgit v1.2.3 From 5651d6aaf489d1db48c253cf884b40214e91c2c5 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 26 Feb 2016 11:50:28 +0100 Subject: mtd: bcm47xxsflash: use ioremap_cache() instead of KSEG0ADDR() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using KSEG0ADDR makes code highly MIPS dependent and not portable. Thanks to the fix a68f376 ("MIPS: io.h: Define `ioremap_cache'") we can use ioremap_cache which is generic and supported on MIPS as well now. KSEG0ADDR was translating 0x1c000000 into 0x9c000000. With ioremap_cache we use MIPS's __ioremap (and then remap_area_pages). This results in different address (e.g. 0xc0080000) but it still should be cached as expected and it was successfully tested with BCM47186B0. Other than that drivers/bcma/driver_chipcommon_sflash.c nicely setups a struct resource for access window, but we wren't using it. Use it now and drop duplicated info. Signed-off-by: Brian Norris Signed-off-by: Rafał Miłecki --- include/linux/bcma/bcma_driver_chipcommon.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 846513c73606..a5ac2cad5cb7 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -587,7 +587,6 @@ struct mtd_info; struct bcma_sflash { bool present; - u32 window; u32 blocksize; u16 numblocks; u32 size; -- cgit v1.2.3 From 5b01e4b9efa0b78672cbbea830c9fbcc7f239e29 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 4 Apr 2016 11:43:54 +0200 Subject: libata: Implement NCQ autosense Some newer devices support NCQ autosense (cf ACS-4), so we should be using it to retrieve the sense code and speed up recovery. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index c1a2f345cbe6..e797e1b53006 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -528,6 +528,8 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) #define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) +#define ata_id_has_ncq_autosense(id) \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { -- cgit v1.2.3 From e87fd28cf9a2d9018ac4b6dd92f0b417714bc18d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 4 Apr 2016 11:43:55 +0200 Subject: libata: Implement support for sense data reporting ACS-4 defines a sense data reporting feature set. This patch implements support for it. tj: Cosmetic formatting updates. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index e797e1b53006..00aebc4c83ad 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -385,6 +385,8 @@ enum { SATA_SSP = 0x06, /* Software Settings Preservation */ SATA_DEVSLP = 0x09, /* Device Sleep */ + SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */ + /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, ATA_SET_MAX_PASSWD = 0x01, @@ -718,6 +720,20 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) return false; } +static inline bool ata_id_has_sense_reporting(const u16 *id) +{ + if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + return false; + return id[ATA_ID_COMMAND_SET_3] & (1 << 6); +} + +static inline bool ata_id_sense_reporting_enabled(const u16 *id) +{ + if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + return false; + return id[ATA_ID_COMMAND_SET_4] & (1 << 6); +} + /** * ata_id_major_version - get ATA level of drive * @id: Identify data -- cgit v1.2.3 From 06dbde5f3a44248fc02e24d662ac4849202abb48 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 4 Apr 2016 11:44:03 +0200 Subject: libata: Implement control mode page to select sense format Implement MODE SELECT for the control mode page to allow the OS to switch to descriptor sense. tj: Dropped s/sb/cmd->sense_buffer/ in ata_gen_ata_sense(). Added @dev description to ata_msense_ctl_mode(). Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 2c4ebef79d0c..a418bca0df0d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -180,6 +180,7 @@ enum { ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ ATA_DFLAG_DEVSLP = (1 << 27), /* device supports Device Sleep */ ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */ + ATA_DFLAG_D_SENSE = (1 << 29), /* Descriptor sense requested */ ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ -- cgit v1.2.3 From 4113652252fad972e0c191b1e536dc74a6faebdc Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 1 Apr 2016 21:38:16 +0200 Subject: reset: Add missing function stub for device_reset The Mediatek's thermal driver fails to compile when the RESET_CONTROLLER option is not set. Logically, as the driver depends on this option to compile, the Kconfig should select it but actually that is not correct because the Kconfig provides also the COMPILE_TEST to increase the compile test coverage. By providing the missing 'device_reset' stub for the driver in reset.h, that let the kernel to compile on different platforms with the Mediatek thermal driver enabled with the COMPILE_TEST option. Signed-off-by: Daniel Lezcano Signed-off-by: Philipp Zabel --- include/linux/reset.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index a552134a209e..ec0306ce7b92 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -56,6 +56,12 @@ static inline void reset_control_put(struct reset_control *rstc) WARN_ON(1); } +static inline int __must_check device_reset(struct device *dev) +{ + WARN_ON(1); + return -ENOTSUPP; +} + static inline int device_reset_optional(struct device *dev) { return -ENOTSUPP; -- cgit v1.2.3 From 974e0a4537f556867483f493c7f67ccdcb7fc504 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 4 Apr 2016 12:17:09 -0400 Subject: libata-core: Allow longer timeout for drive spinup from PUIS When spinning up a drive from powered on standby mode (PUIS), SETFEATURES_SPINUP is executed with the default timeout used for any SETFEATURES subcommand, that is 5+10 seconds. The total 15s is too short for some drives to complete spinup (e.g. drives with a large indirection table stored on media), resulting in ata_dev_read_id to fail twice on the execution of SETFEATURES_SPINUP. For this feature, allow a larger default timeout of 30 seconds. However, in the same spirit as with the timeout of other feature subcommands, do not ignore ata_probe_timeout if it is set). Signed-off-by: Damien Le Moal Signed-off-by: Tejun Heo --- include/linux/ata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index c1a2f345cbe6..f310ec0f072e 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -371,7 +371,8 @@ enum { SETFEATURES_AAM_ON = 0x42, SETFEATURES_AAM_OFF = 0xC2, - SETFEATURES_SPINUP = 0x07, /* Spin-up drive */ + SETFEATURES_SPINUP = 0x07, /* Spin-up drive */ + SETFEATURES_SPINUP_TIMEOUT = 30000, /* 30s timeout for drive spin-up from PUIS */ SETFEATURES_SATA_ENABLE = 0x10, /* Enable use of SATA feature */ SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */ -- cgit v1.2.3 From 77ed2c5745d93416992857d124f35834b62b3e70 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 8 Mar 2016 20:01:32 +0900 Subject: android,lowmemorykiller: Don't abuse TIF_MEMDIE. Currently, lowmemorykiller (LMK) is using TIF_MEMDIE for two purposes. One is to remember processes killed by LMK, and the other is to accelerate termination of processes killed by LMK. But since LMK is invoked as a memory shrinker function, there still should be some memory available. It is very likely that memory allocations by processes killed by LMK will succeed without using ALLOC_NO_WATERMARKS via TIF_MEMDIE. Even if their allocations cannot escape from memory allocation loop unless they use ALLOC_NO_WATERMARKS, lowmem_deathpending_timeout can guarantee forward progress by choosing next victim process. On the other hand, mark_oom_victim() assumes that it must be called with oom_lock held and it must not be called after oom_killer_disable() was called. But LMK is calling it without holding oom_lock and checking oom_killer_disabled. It is possible that LMK calls mark_oom_victim() due to allocation requests by kernel threads after current thread returned from oom_killer_disabled(). This will break synchronization for PM/suspend. This patch introduces per a task_struct flag for remembering processes killed by LMK, and replaces TIF_MEMDIE with that flag. By applying this patch, assumption by mark_oom_victim() becomes true. Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Cc: Arve Hjonnevag Cc: Riley Andrews Signed-off-by: Greg Kroah-Hartman --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 60bba7e032dc..9dff190e6a0a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2184,6 +2184,7 @@ static inline void memalloc_noio_restore(unsigned int flags) #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ +#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ #define TASK_PFA_TEST(name, func) \ @@ -2207,6 +2208,9 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) +TASK_PFA_TEST(LMK_WAITING, lmk_waiting) +TASK_PFA_SET(LMK_WAITING, lmk_waiting) + /* * task->jobctl flags */ -- cgit v1.2.3 From ca065d0cf80fa547724440a8bf37f1e674d917c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Apr 2016 08:52:13 -0700 Subject: udp: no longer use SLAB_DESTROY_BY_RCU Tom Herbert would like not touching UDP socket refcnt for encapsulated traffic. For this to happen, we need to use normal RCU rules, with a grace period before freeing a socket. UDP sockets are not short lived in the high usage case, so the added cost of call_rcu() should not be a concern. This actually removes a lot of complexity in UDP stack. Multicast receives no longer need to hold a bucket spinlock. Note that ip early demux still needs to take a reference on the socket. Same remark for functions used by xt_socket and xt_PROXY netfilter modules, but this might be changed later. Performance for a single UDP socket receiving flood traffic from many RX queues/cpus. Simple udp_rx using simple recvfrom() loop : 438 kpps instead of 374 kpps : 17 % increase of the peak rate. v2: Addressed Willem de Bruijn feedback in multicast handling - keep early demux break in __udp4_lib_demux_lookup() Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Willem de Bruijn Tested-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/udp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 87c094961bd5..32342754643a 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } -#define udp_portaddr_for_each_entry(__sk, node, list) \ - hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry(__sk, list) \ + hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) -#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ - hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry_rcu(__sk, list) \ + hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) -- cgit v1.2.3 From 8f6fd83c6c5ec66a4a70c728535ddcdfef4f3697 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Wed, 2 Mar 2016 10:09:19 -0500 Subject: rhashtable: accept GFP flags in rhashtable_walk_init In certain cases, the 802.11 mesh pathtable code wants to iterate over all of the entries in the forwarding table from the receive path, which is inside an RCU read-side critical section. Enable walks inside atomic sections by allowing GFP_ATOMIC allocations for the walker state. Change all existing callsites to pass in GFP_KERNEL. Acked-by: Thomas Graf Signed-off-by: Bob Copeland [also adjust gfs2/glock.c and rhashtable tests] Signed-off-by: Johannes Berg --- include/linux/rhashtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 63bd7601b6de..3eef0802a0cd 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -346,7 +346,8 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, struct bucket_table *old_tbl); int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); +int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, + gfp_t gfp); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); void *rhashtable_walk_next(struct rhashtable_iter *iter); -- cgit v1.2.3 From 4da56b99d99e5a7df2b7f11e87bfea935f909732 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Apr 2016 14:46:42 +0100 Subject: mm/vmap: Add a notifier for when we run out of vmap address space vmaps are temporary kernel mappings that may be of long duration. Reusing a vmap on an object is preferrable for a driver as the cost of setting up the vmap can otherwise dominate the operation on the object. However, the vmap address space is rather limited on 32bit systems and so we add a notification for vmap pressure in order for the driver to release any cached vmappings. The interface is styled after the oom-notifier where the callees are passed a pointer to an unsigned long counter for them to indicate if they have freed any space. v2: Guard the blocking notifier call with gfpflags_allow_blocking() v3: Correct typo in forward declaration and move to head of file Signed-off-by: Chris Wilson Cc: Andrew Morton Cc: David Rientjes Cc: Roman Peniaev Cc: Mel Gorman Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Acked-by: Andrew Morton # for inclusion via DRM Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1459777603-23618-3-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- include/linux/vmalloc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index d1f1d338af20..8b51df3ab334 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -8,6 +8,7 @@ #include struct vm_area_struct; /* vma defining user mapping in mm_types.h */ +struct notifier_block; /* in notifier.h */ /* bits in flags of vmalloc's vm_struct below */ #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ @@ -187,4 +188,7 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) #define VMALLOC_TOTAL 0UL #endif +int register_vmap_purge_notifier(struct notifier_block *nb); +int unregister_vmap_purge_notifier(struct notifier_block *nb); + #endif /* _LINUX_VMALLOC_H */ -- cgit v1.2.3 From 3c5bcb2e1930bdbccd14a49660895f014349b51d Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Thu, 17 Mar 2016 15:02:53 +0200 Subject: ieee80211: support parsing Fine Timing Measurement action frame Add definition for Fine Timing Measurement (FTM) frame format as defined in IEEE802.11-REVmcD5.0 section 9.6.8.33 Signed-off-by: Avraham Stern Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 3b1f6cef9513..bf9706c5b0bd 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -7,6 +7,7 @@ * Copyright (c) 2005, Devicescape Software, Inc. * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH + * Copyright (c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1011,6 +1012,16 @@ struct ieee80211_mgmt { u8 tpc_elem_length; struct ieee80211_tpc_report_ie tpc; } __packed tpc_report; + struct { + u8 action_code; + u8 dialog_token; + u8 follow_up; + u8 tod[6]; + u8 toa[6]; + __le16 tod_error; + __le16 toa_error; + u8 variable[0]; + } __packed ftm; } u; } __packed action; } u; -- cgit v1.2.3 From c663e5f56737757db4d0b317c510ab505f93cecb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 22 Mar 2016 10:51:16 +0100 Subject: gpio: support native single-ended hardware drivers Some GPIO controllers has a special hardware bit we can flip to support open drain / source. This means that on these hardwares we do not need to emulate OD/OS by setting the line to input instead of actively driving it high/low. Add an optional vtable callback to the driver set_single_ended() so that driver can implement this in hardware if they have it. We may need a pinctrl_gpio_set_config() call at some point to propagate this down to a backing pin control device on systems with split GPIO/pin control. Reported-by: Michael Hennerich Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index bee976f82788..50882e09289b 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -19,6 +19,18 @@ struct gpio_device; #ifdef CONFIG_GPIOLIB +/** + * enum single_ended_mode - mode for single ended operation + * @LINE_MODE_PUSH_PULL: normal mode for a GPIO line, drive actively high/low + * @LINE_MODE_OPEN_DRAIN: set line to be open drain + * @LINE_MODE_OPEN_SOURCE: set line to be open source + */ +enum single_ended_mode { + LINE_MODE_PUSH_PULL, + LINE_MODE_OPEN_DRAIN, + LINE_MODE_OPEN_SOURCE, +}; + /** * struct gpio_chip - abstract a GPIO controller * @label: a functional name for the GPIO device, such as a part @@ -38,7 +50,15 @@ struct gpio_device; * @set: assigns output value for signal "offset" * @set_multiple: assigns output values for multiple signals defined by "mask" * @set_debounce: optional hook for setting debounce time for specified gpio in - * interrupt triggered gpio chips + * interrupt triggered gpio chips + * @set_single_ended: optional hook for setting a line as open drain, open + * source, or non-single ended (restore from open drain/source to normal + * push-pull mode) this should be implemented if the hardware supports + * open drain or open source settings. The GPIOlib will otherwise try + * to emulate open drain/source by not actively driving lines high/low + * if a consumer request this. The driver may return -ENOTSUPP if e.g. + * it supports just open drain but not open source and is called + * with LINE_MODE_OPEN_SOURCE as mode argument. * @to_irq: optional hook supporting non-static gpio_to_irq() mappings; * implementation may not sleep * @dbg_show: optional routine to show contents in debugfs; default code @@ -130,6 +150,9 @@ struct gpio_chip { int (*set_debounce)(struct gpio_chip *chip, unsigned offset, unsigned debounce); + int (*set_single_ended)(struct gpio_chip *chip, + unsigned offset, + enum single_ended_mode mode); int (*to_irq)(struct gpio_chip *chip, unsigned offset); -- cgit v1.2.3 From 627d2d6b550094d88f9e518e15967e7bf906ebbf Mon Sep 17 00:00:00 2001 From: samanthakumar Date: Tue, 5 Apr 2016 12:41:16 -0400 Subject: udp: enable MSG_PEEK at non-zero offset Enable peeking at UDP datagrams at the offset specified with socket option SOL_SOCKET/SO_PEEK_OFF. Peek at any datagram in the queue, up to the end of the given datagram. Implement the SO_PEEK_OFF semantics introduced in commit ef64a54f6e55 ("sock: Introduce the SO_PEEK_OFF sock option"). Increase the offset on peek, decrease it on regular reads. When peeking, always checksum the packet immediately, to avoid recomputation on subsequent peeks and final read. The socket lock is not held for the duration of udp_recvmsg, so peek and read operations can run concurrently. Only the last store to sk_peek_off is preserved. Signed-off-by: Sam Kumar Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 15d0df943466..007381270ff8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2949,7 +2949,12 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); +void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len); +static inline void skb_free_datagram_locked(struct sock *sk, + struct sk_buff *skb) +{ + __skb_free_datagram_locked(sk, skb, 0); +} int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); -- cgit v1.2.3 From f9cd476123ced488e628339becedb2cf3243a58a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 4 Apr 2016 22:44:59 +0200 Subject: dmaengine: pl08x: allocate OF slave channel data at probe time The current OF translation of channels can never work with any DMA client using the DMA channels directly: the only way to get the channels initialized properly is in the dma_async_device_register() call, where chan->dev etc is allocated and initialized. Allocate and initialize all possible DMA channels and only augment a target channel with the periph_buses at of_xlate(). Remove some const settings to make things work. Cc: Maxime Ripard Tested-by: Joachim Eastwood Tested-by: Johannes Stezenbach Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- include/linux/amba/pl08x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 10fe2a211c2e..27e9ec8778eb 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -86,7 +86,7 @@ struct pl08x_channel_data { * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2 */ struct pl08x_platform_data { - const struct pl08x_channel_data *slave_channels; + struct pl08x_channel_data *slave_channels; unsigned int num_slave_channels; struct pl08x_channel_data memcpy_channel; int (*get_xfer_signal)(const struct pl08x_channel_data *); -- cgit v1.2.3 From fa59507f720077a856c9952a31cfd45cd97ef6f9 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 30 Mar 2016 12:56:28 +0300 Subject: usb: otg-fsm: Add documentation for struct otg_fsm struct otg_fsm is the interface to the OTG state machine. Document the input, output and internal state variables. Definations are taken from Table 7-2 and Table 7-4 of the USB OTG & EH Specification Rev.2.0 Re-arrange some of the members as per use case for more clarity. Signed-off-by: Roger Quadros Acked-by: Peter Chen Signed-off-by: Peter Chen --- include/linux/usb/otg-fsm.h | 90 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 24198e16f849..8eec0c261be5 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -72,37 +72,113 @@ enum otg_fsm_timer { NUM_OTG_FSM_TIMERS, }; -/* OTG state machine according to the OTG spec */ +/** + * struct otg_fsm - OTG state machine according to the OTG spec + * + * OTG hardware Inputs + * + * Common inputs for A and B device + * @id: TRUE for B-device, FALSE for A-device. + * @adp_change: TRUE when current ADP measurement (n) value, compared to the + * ADP measurement taken at n-2, differs by more than CADP_THR + * @power_up: TRUE when the OTG device first powers up its USB system and + * ADP measurement taken if ADP capable + * + * A-Device state inputs + * @a_srp_det: TRUE if the A-device detects SRP + * @a_vbus_vld: TRUE when VBUS voltage is in regulation + * @b_conn: TRUE if the A-device detects connection from the B-device + * @a_bus_resume: TRUE when the B-device detects that the A-device is signaling + * a resume (K state) + * B-Device state inputs + * @a_bus_suspend: TRUE when the B-device detects that the A-device has put the + * bus into suspend + * @a_conn: TRUE if the B-device detects a connection from the A-device + * @b_se0_srp: TRUE when the line has been at SE0 for more than the minimum + * time before generating SRP + * @b_ssend_srp: TRUE when the VBUS has been below VOTG_SESS_VLD for more than + * the minimum time before generating SRP + * @b_sess_vld: TRUE when the B-device detects that the voltage on VBUS is + * above VOTG_SESS_VLD + * @test_device: TRUE when the B-device switches to B-Host and detects an OTG + * test device. This must be set by host/hub driver + * + * Application inputs (A-Device) + * @a_bus_drop: TRUE when A-device application needs to power down the bus + * @a_bus_req: TRUE when A-device application wants to use the bus. + * FALSE to suspend the bus + * + * Application inputs (B-Device) + * @b_bus_req: TRUE during the time that the Application running on the + * B-device wants to use the bus + * + * Auxilary inputs (OTG v1.3 only. Obsolete now.) + * @a_sess_vld: TRUE if the A-device detects that VBUS is above VA_SESS_VLD + * @b_bus_suspend: TRUE when the A-device detects that the B-device has put + * the bus into suspend + * @b_bus_resume: TRUE when the A-device detects that the B-device is signaling + * resume on the bus + * + * OTG Output status. Read only for users. Updated by OTG FSM helpers defined + * in this file + * + * Outputs for Both A and B device + * @drv_vbus: TRUE when A-device is driving VBUS + * @loc_conn: TRUE when the local device has signaled that it is connected + * to the bus + * @loc_sof: TRUE when the local device is generating activity on the bus + * @adp_prb: TRUE when the local device is in the process of doing + * ADP probing + * + * Outputs for B-device state + * @adp_sns: TRUE when the B-device is in the process of carrying out + * ADP sensing + * @data_pulse: TRUE when the B-device is performing data line pulsing + * + * Internal Variables + * + * a_set_b_hnp_en: TRUE when the A-device has successfully set the + * b_hnp_enable bit in the B-device. + * Unused as OTG fsm uses otg->host->b_hnp_enable instead + * b_srp_done: TRUE when the B-device has completed initiating SRP + * b_hnp_enable: TRUE when the B-device has accepted the + * SetFeature(b_hnp_enable) B-device. + * Unused as OTG fsm uses otg->gadget->b_hnp_enable instead + * a_clr_err: Asserted (by application ?) to clear a_vbus_err due to an + * overcurrent condition and causes the A-device to transition + * to a_wait_vfall + */ struct otg_fsm { /* Input */ int id; int adp_change; int power_up; - int test_device; - int a_bus_drop; - int a_bus_req; int a_srp_det; int a_vbus_vld; int b_conn; int a_bus_resume; int a_bus_suspend; int a_conn; - int b_bus_req; int b_se0_srp; int b_ssend_srp; int b_sess_vld; + int test_device; + int a_bus_drop; + int a_bus_req; + int b_bus_req; + /* Auxilary inputs */ int a_sess_vld; int b_bus_resume; int b_bus_suspend; /* Output */ - int data_pulse; int drv_vbus; int loc_conn; int loc_sof; int adp_prb; int adp_sns; + int data_pulse; /* Internal variables */ int a_set_b_hnp_en; @@ -110,7 +186,7 @@ struct otg_fsm { int b_hnp_enable; int a_clr_err; - /* Informative variables */ + /* Informative variables. All unused as of now */ int a_bus_drop_inf; int a_bus_req_inf; int a_clr_err_inf; -- cgit v1.2.3 From 4e332df63487418ec512c3c376c07df9ab3ae035 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 30 Mar 2016 12:56:29 +0300 Subject: usb: otg-fsm: support multiple instances Move the state_changed variable into struct otg_fsm so that we can support multiple instances. Signed-off-by: Roger Quadros Acked-by: Peter Chen Signed-off-by: Peter Chen --- include/linux/usb/otg-fsm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 8eec0c261be5..7a0350535cb1 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -210,6 +210,7 @@ struct otg_fsm { struct mutex lock; u8 *host_req_flag; struct delayed_work hnp_polling_work; + bool state_changed; }; struct otg_fsm_ops { -- cgit v1.2.3 From 49ddf8e6e2347cffdcf83d1ca2d04ff929820178 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 31 Mar 2016 20:02:10 +0300 Subject: mac80211: add fast-rx path The regular RX path has a lot of code, but with a few assumptions on the hardware it's possible to reduce the amount of code significantly. Currently the assumptions on the driver are the following: * hardware/driver reordering buffer (if supporting aggregation) * hardware/driver decryption & PN checking (if using encryption) * hardware/driver did de-duplication * hardware/driver did A-MSDU deaggregation * AP_LINK_PS is used (in AP mode) * no client powersave handling in mac80211 (in client mode) of which some are actually checked per packet: * de-duplication * PN checking * decryption and additionally packets must * not be A-MSDU (have been deaggregated by driver/device) * be data packets * not be fragmented * be unicast * have RFC 1042 header Additionally dynamically we assume: * no encryption or CCMP/GCMP, TKIP/WEP/other not allowed * station must be authorized * 4-addr format not enabled Some data needed for the RX path is cached in a new per-station "fast_rx" structure, so that we only need to look at this and the packet, no other memory when processing packets on the fast RX path. After doing the above per-packet checks, the data path collapses down to a pretty simple conversion function taking advantage of the data cached in the small fast_rx struct. This should speed up the RX processing, and will make it easier to reason about parallelizing RX (for which statistics will need to be per-CPU still.) Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index bf9706c5b0bd..113bfc468a4d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -638,6 +638,16 @@ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0; } +/** + * ieee80211_is_frag - check if a frame is a fragment + * @hdr: 802.11 header of the frame + */ +static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) +{ + return ieee80211_has_morefrags(hdr->frame_control) || + hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); +} + struct ieee80211s_hdr { u8 flags; u8 ttl; -- cgit v1.2.3 From 6e0456b5454561c4e9fa9e8a4acea405e6d56c80 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 3 Mar 2016 22:59:00 +0100 Subject: mac80211: add A-MSDU tx support Requires software tx queueing and fast-xmit support. For good performance, drivers need frag_list support as well. This avoids the need for copying data of aggregated frames. Running without it is only supported for debugging purposes. To avoid performance and packet size issues, the rate control module or driver needs to limit the maximum A-MSDU size by setting max_rc_amsdu_len in struct ieee80211_sta. Signed-off-by: Felix Fietkau [fix locking issue] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 113bfc468a4d..b118744d3382 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -164,6 +164,9 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) /* 30 byte 4 addr hdr, 2 byte QoS, 2304 byte MSDU, 12 byte crypt, 4 byte FCS */ #define IEEE80211_MAX_FRAME_LEN 2352 +/* Maximal size of an A-MSDU that can be transported in a HT BA session */ +#define IEEE80211_MAX_MPDU_LEN_HT_BA 4095 + /* Maximal size of an A-MSDU */ #define IEEE80211_MAX_MPDU_LEN_HT_3839 3839 #define IEEE80211_MAX_MPDU_LEN_HT_7935 7935 -- cgit v1.2.3 From e68503bd6836ba765dc8e0ee77ea675fedc07e41 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Apr 2016 16:14:24 +0100 Subject: KEYS: Generalise system_verify_data() to provide access to internal content Generalise system_verify_data() to provide access to internal content through a callback. This allows all the PKCS#7 stuff to be hidden inside this function and removed from the PE file parser and the PKCS#7 test key. If external content is not required, NULL should be passed as data to the function. If the callback is not required, that can be set to NULL. The function is now called verify_pkcs7_signature() to contrast with verify_pefile_signature() and the definitions of both have been moved into linux/verification.h along with the key_being_used_for enum. Signed-off-by: David Howells --- include/linux/verification.h | 50 +++++++++++++++++++++++++++++++++++++++++++ include/linux/verify_pefile.h | 22 ------------------- 2 files changed, 50 insertions(+), 22 deletions(-) create mode 100644 include/linux/verification.h delete mode 100644 include/linux/verify_pefile.h (limited to 'include/linux') diff --git a/include/linux/verification.h b/include/linux/verification.h new file mode 100644 index 000000000000..bb0fcf941cb7 --- /dev/null +++ b/include/linux/verification.h @@ -0,0 +1,50 @@ +/* Signature verification + * + * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_VERIFICATION_H +#define _LINUX_VERIFICATION_H + +/* + * The use to which an asymmetric key is being put. + */ +enum key_being_used_for { + VERIFYING_MODULE_SIGNATURE, + VERIFYING_FIRMWARE_SIGNATURE, + VERIFYING_KEXEC_PE_SIGNATURE, + VERIFYING_KEY_SIGNATURE, + VERIFYING_KEY_SELF_SIGNATURE, + VERIFYING_UNSPECIFIED_SIGNATURE, + NR__KEY_BEING_USED_FOR +}; +extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR]; + +#ifdef CONFIG_SYSTEM_DATA_VERIFICATION + +struct key; + +extern int verify_pkcs7_signature(const void *data, size_t len, + const void *raw_pkcs7, size_t pkcs7_len, + struct key *trusted_keys, + int untrusted_error, + enum key_being_used_for usage, + int (*view_content)(void *ctx, + const void *data, size_t len, + size_t asn1hdrlen), + void *ctx); + +#ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION +extern int verify_pefile_signature(const void *pebuf, unsigned pelen, + struct key *trusted_keys, + enum key_being_used_for usage); +#endif + +#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */ +#endif /* _LINUX_VERIFY_PEFILE_H */ diff --git a/include/linux/verify_pefile.h b/include/linux/verify_pefile.h deleted file mode 100644 index da2049b5161c..000000000000 --- a/include/linux/verify_pefile.h +++ /dev/null @@ -1,22 +0,0 @@ -/* Signed PE file verification - * - * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#ifndef _LINUX_VERIFY_PEFILE_H -#define _LINUX_VERIFY_PEFILE_H - -#include - -extern int verify_pefile_signature(const void *pebuf, unsigned pelen, - struct key *trusted_keyring, - enum key_being_used_for usage, - bool *_trusted); - -#endif /* _LINUX_VERIFY_PEFILE_H */ -- cgit v1.2.3 From bda850cd214e90b1be0cc25bc48c4f6ac53eb543 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Apr 2016 16:14:24 +0100 Subject: PKCS#7: Make trust determination dependent on contents of trust keyring Make the determination of the trustworthiness of a key dependent on whether a key that can verify it is present in the supplied ring of trusted keys rather than whether or not the verifying key has KEY_FLAG_TRUSTED set. verify_pkcs7_signature() will return -ENOKEY if the PKCS#7 message trust chain cannot be verified. Signed-off-by: David Howells --- include/linux/verification.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/verification.h b/include/linux/verification.h index bb0fcf941cb7..a10549a6c7cd 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -33,7 +33,6 @@ struct key; extern int verify_pkcs7_signature(const void *data, size_t len, const void *raw_pkcs7, size_t pkcs7_len, struct key *trusted_keys, - int untrusted_error, enum key_being_used_for usage, int (*view_content)(void *ctx, const void *data, size_t len, -- cgit v1.2.3 From 31e6850e0fdb3a586363cc4d2f9801cdf9374310 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 5 Apr 2016 12:39:30 +0100 Subject: iommu: Add MMIO mapping type On some platforms, MMIO regions might need slightly different treatment compared to mapping regular memory; add the notion of MMIO mappings to the IOMMU API's memory type flags, so that callers can let the IOMMU drivers know to do the right thing. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a5c539fa5d2b..34b643227df1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -30,6 +30,7 @@ #define IOMMU_WRITE (1 << 1) #define IOMMU_CACHE (1 << 2) /* DMA cache coherency */ #define IOMMU_NOEXEC (1 << 3) +#define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */ struct iommu_ops; struct iommu_group; -- cgit v1.2.3 From 848fc67da5fb43ef7d92d20891eef79f5de45816 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Mar 2016 15:32:40 +0100 Subject: clk: renesas: mstp: Drop check for CONFIG_PM_GENERIC_DOMAINS_OF As of commit 71d076ceb245f0d9 ("ARM: shmobile: Enable PM and PM_GENERIC_DOMAINS for SoCs with PM Domains"), CONFIG_PM_GENERIC_DOMAINS_OF is always enabled for SoCs with MSTP clocks. Signed-off-by: Geert Uytterhoeven Acked-by: Laurent Pinchart --- include/linux/clk/renesas.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index 7adfd80fbf55..2a3379cf1330 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -24,12 +24,8 @@ void r8a7778_clocks_init(u32 mode); void r8a7779_clocks_init(u32 mode); void rcar_gen2_clocks_init(u32 mode); -#ifdef CONFIG_PM_GENERIC_DOMAINS_OF void cpg_mstp_add_clk_domain(struct device_node *np); int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev); void cpg_mstp_detach_dev(struct generic_pm_domain *domain, struct device *dev); -#else -static inline void cpg_mstp_add_clk_domain(struct device_node *np) {} -#endif #endif -- cgit v1.2.3 From 12a56817b329d8a73ab53bad09aa976aeea46db9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Mar 2016 16:59:26 +0100 Subject: clk: renesas: mstp: Clarify cpg_mstp_{at,de}tach_dev() domain parameter Make it clear that the "domain" parameter of the cpg_mstp_attach_dev() and cpg_mstp_detach_dev() functions is not used. The cpg_mstp_attach_dev() and cpg_mstp_detach_dev() callbacks are not only used by the CPG/MSTP Clock Domain driver, but also by the R-Mobile SYSC PM Domain driver. Signed-off-by: Geert Uytterhoeven --- include/linux/clk/renesas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index 2a3379cf1330..095b1681daf4 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -25,7 +25,7 @@ void r8a7779_clocks_init(u32 mode); void rcar_gen2_clocks_init(u32 mode); void cpg_mstp_add_clk_domain(struct device_node *np); -int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev); -void cpg_mstp_detach_dev(struct generic_pm_domain *domain, struct device *dev); +int cpg_mstp_attach_dev(struct generic_pm_domain *unused, struct device *dev); +void cpg_mstp_detach_dev(struct generic_pm_domain *unused, struct device *dev); #endif -- cgit v1.2.3 From 8ced425ee630c03beea06c1dfa35190bf8395d07 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 5 Apr 2016 17:10:16 +0200 Subject: tun: use socket locks for sk_{attach,detatch}_filter This reverts commit 5a5abb1fa3b05dd ("tun, bpf: fix suspicious RCU usage in tun_{attach, detach}_filter") and replaces it to use lock_sock around sk_{attach,detach}_filter. The checks inside filter.c are updated with lockdep_sock_is_held to check for proper socket locks. It keeps the code cleaner by ensuring that only one lock governs the socket filter instead of two independent locks. Cc: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/filter.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index a51a5361695f..43aa1f8855c7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -465,14 +465,10 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); -int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, - bool locked); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); -int __sk_detach_filter(struct sock *sk, bool locked); - int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); -- cgit v1.2.3 From a6024562ffd7e0f31bc6671817840ad1e91de7b4 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 5 Apr 2016 08:22:51 -0700 Subject: udp: Add GRO functions to UDP socket This patch adds GRO functions (gro_receive and gro_complete) to UDP sockets. udp_gro_receive is changed to perform socket lookup on a packet. If a socket is found the related GRO functions are called. This features obsoletes using UDP offload infrastructure for GRO (udp_offload). This has the advantage of not being limited to provide offload on a per port basis, GRO is now applied to whatever individual UDP sockets are bound to. This also allows the possbility of "application defined GRO"-- that is we can attach something like a BPF program to a UDP socket to perfrom GRO on an application layer protocol. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/udp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 32342754643a..d1fd8cd39478 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -71,6 +71,14 @@ struct udp_sock { */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); + + /* GRO functions for UDP socket */ + struct sk_buff ** (*gro_receive)(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sock *sk, + struct sk_buff *skb, + int nhoff); }; static inline struct udp_sock *udp_sk(const struct sock *sk) -- cgit v1.2.3 From 46aa2f30aa7fe03a4dcd732b009284c02ff4f093 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 5 Apr 2016 08:22:56 -0700 Subject: udp: Remove udp_offloads Now that the UDP encapsulation GRO functions have been moved to the UDP socket we not longer need the udp_offload insfrastructure so removing it. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb0d5d09c2e4..cb4e508b3f38 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2159,23 +2159,6 @@ struct packet_offload { struct list_head list; }; -struct udp_offload; - -struct udp_offload_callbacks { - struct sk_buff **(*gro_receive)(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff); - int (*gro_complete)(struct sk_buff *skb, - int nhoff, - struct udp_offload *uoff); -}; - -struct udp_offload { - __be16 port; - u8 ipproto; - struct udp_offload_callbacks callbacks; -}; - /* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; -- cgit v1.2.3 From ec5e099d6e941668d121ea9ca7057f4fa00830b0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Apr 2016 18:43:22 -0700 Subject: perf: optimize perf_fetch_caller_regs avoid memset in perf_fetch_caller_regs, since it's the critical path of all tracepoints. It's called from perf_sw_event_sched, perf_event_task_sched_in and all of perf_trace_##call with this_cpu_ptr(&__perf_regs[..]) which are zero initialized by perpcu init logic and subsequent call to perf_arch_fetch_caller_regs initializes the same fields on all archs, so we can safely drop memset from all of the above cases and move it into perf_ftrace_function_call that calls it with stack allocated pt_regs. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/perf_event.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f291275ffd71..e89f7199c223 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -882,8 +882,6 @@ static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned lo */ static inline void perf_fetch_caller_regs(struct pt_regs *regs) { - memset(regs, 0, sizeof(*regs)); - perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); } -- cgit v1.2.3 From 1e1dcd93b468901e114f279c94a0b356adc5e7cd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Apr 2016 18:43:24 -0700 Subject: perf: split perf_trace_buf_prepare into alloc and update parts split allows to move expensive update of 'struct trace_entry' to later phase. Repurpose unused 1st argument of perf_tp_event() to indicate event type. While splitting use temp variable 'rctx' instead of '*rctx' to avoid unnecessary loads done by the compiler due to -fno-strict-aliasing Signed-off-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Signed-off-by: David S. Miller --- include/linux/perf_event.h | 2 +- include/linux/trace_events.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e89f7199c223..eb41b535ef38 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1016,7 +1016,7 @@ static inline bool perf_paranoid_kernel(void) } extern void perf_event_init(void); -extern void perf_tp_event(u64 addr, u64 count, void *record, +extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, struct task_struct *task); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 0810f81b6db2..56f795e6a093 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -605,15 +605,15 @@ extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); -extern void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs **regs, int *rctxp); +void perf_trace_buf_update(void *record, u16 type); +void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); static inline void -perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, +perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, struct task_struct *task) { - perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); } #endif -- cgit v1.2.3 From 9940d67c93b5bb7ddcf862b41b1847cb728186c4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Apr 2016 18:43:27 -0700 Subject: bpf: support bpf_get_stackid() and bpf_perf_event_output() in tracepoint programs needs two wrapper functions to fetch 'struct pt_regs *' to convert tracepoint bpf context into kprobe bpf context to reuse existing helper functions Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 21ee41b92e8a..198f6ace70ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -160,6 +160,7 @@ struct bpf_array { #define MAX_TAIL_CALL_CNT 32 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); +u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_fd_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); -- cgit v1.2.3 From 32bbe0078afe86a8bf4c67c6b3477781b15e94dc Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Apr 2016 18:43:28 -0700 Subject: bpf: sanitize bpf tracepoint access during bpf program loading remember the last byte of ctx access and at the time of attaching the program to tracepoint check that the program doesn't access bytes beyond defined in tracepoint fields This also disallows access to __dynamic_array fields, but can be relaxed in the future. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/linux/trace_events.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 198f6ace70ec..b2365a6eba3d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -131,6 +131,7 @@ struct bpf_prog_type_list { struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; + u32 max_ctx_offset; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 56f795e6a093..fe6441203b59 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -569,6 +569,7 @@ extern int trace_define_field(struct trace_event_call *call, const char *type, int is_signed, int filter_type); extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); +extern int trace_event_get_offsets(struct trace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) -- cgit v1.2.3 From 1d111406c6d91f4d7f6cc69a43e59546e8010aae Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 20 Mar 2016 13:57:20 +0100 Subject: PCI: Add Intel Thunderbolt device IDs Intel Gen 1 and 2 chips use the same ID for NHI, bridges and switch. Gen 3 chips and onward use a distinct ID for the NHI. No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Acked-by: Andreas Noever --- include/linux/pci_ids.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 247da8c95860..c58752fe16c4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2604,6 +2604,24 @@ #define PCI_DEVICE_ID_INTEL_82441 0x1237 #define PCI_DEVICE_ID_INTEL_82380FB 0x124b #define PCI_DEVICE_ID_INTEL_82439 0x1250 +#define PCI_DEVICE_ID_INTEL_LIGHT_RIDGE 0x1513 /* Tbt 1 Gen 1 */ +#define PCI_DEVICE_ID_INTEL_EAGLE_RIDGE 0x151a +#define PCI_DEVICE_ID_INTEL_LIGHT_PEAK 0x151b +#define PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C 0x1547 /* Tbt 1 Gen 2 */ +#define PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_2C 0x1548 +#define PCI_DEVICE_ID_INTEL_PORT_RIDGE 0x1549 +#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_2C_NHI 0x1566 /* Tbt 1 Gen 3 */ +#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_2C_BRIDGE 0x1567 +#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_4C_NHI 0x1568 +#define PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_4C_BRIDGE 0x1569 +#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI 0x156a /* Thunderbolt 2 */ +#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE 0x156b +#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI 0x156c +#define PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE 0x156d +#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_NHI 0x1575 /* Thunderbolt 3 */ +#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_BRIDGE 0x1576 +#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI 0x1577 +#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_BRIDGE 0x1578 #define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 -- cgit v1.2.3 From 6c9d9c81924b4b63c7a487e90fddb3b2d0f7d458 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Apr 2016 23:38:46 +0200 Subject: cpufreq: Call cpufreq_disable_fast_switch() in sugov_exit() Due to differences in the cpufreq core's handling of runtime CPU offline and nonboot CPUs disabling during system suspend-to-RAM, fast frequency switching gets disabled after a suspend-to-RAM and resume cycle on all of the nonboot CPUs. To prevent that from happening, move the invocation of cpufreq_disable_fast_switch() from cpufreq_exit_governor() to sugov_exit(), as the schedutil governor is the only user of fast frequency switching today anyway. That simply prevents cpufreq_disable_fast_switch() from being called without invoking the ->governor callback for the CPUFREQ_GOV_POLICY_EXIT event (which happens during system suspend now). Fixes: b7898fda5bc7 (cpufreq: Support for fast frequency switching) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 55e69ebb035c..4e81e08db752 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -168,6 +168,7 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); +void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); #else static inline unsigned int cpufreq_get(unsigned int cpu) { -- cgit v1.2.3 From 7e67e239a4f32fa3e2d51dd9a7930018651635b6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 30 Mar 2016 13:45:25 +0530 Subject: cpufreq: dt: Include types.h from cpufreq-dt.h cpufreq-dt.h uses 'bool' data type but doesn't include types.h. It works fine for now as the files that include cpufreq-dt.h, also include types.h directly or indirectly. But, when a file includes cpufreq-dt.h without including types.h, we get a build error. Avoid such errors by including types.h in cpufreq-dt itself. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq-dt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h index 0414009e2c30..a87335a1660c 100644 --- a/include/linux/cpufreq-dt.h +++ b/include/linux/cpufreq-dt.h @@ -10,6 +10,8 @@ #ifndef __CPUFREQ_DT_H__ #define __CPUFREQ_DT_H__ +#include + struct cpufreq_dt_platform_data { /* * True when each CPU has its own clock to control its -- cgit v1.2.3 From 357f435d8a0d32068c75f3c7176434d992b3adb7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Apr 2016 19:05:19 -0400 Subject: fix the copy vs. map logics in blk_rq_map_user_iov() Signed-off-by: Al Viro --- include/linux/uio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index fd9bcfedad42..1b5d1cd796e2 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -87,6 +87,7 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); +unsigned long iov_iter_gap_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec, -- cgit v1.2.3 From 1373718194ebebc43c00d8f117e03885749495b0 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 22 Mar 2016 08:51:10 +0800 Subject: ACPI / PM: Introduce efi poweroff for HW-full platforms without _S5 The problem is Linux registers pm_power_off = efi_power_off only if we are in hardware reduced mode. Actually, what we also want is to do this when ACPI S5 is simply not supported on non-legacy platforms. Since some future Intel platforms are HW-full mode where the DSDT fails to supply an _S5 object(without SLP_TYP), we should let such kind of platform to leverage efi runtime service to poweroff. This patch uses efi power off as first choice when S5 is unavailable, even if there is a customized poweroff(driver provided, eg). Meanwhile, the legacy platforms will not be affected because there is no path for them to overwrite the pm_power_off to efi power off. Suggested-by: Len Brown Reviewed-by: Matt Fleming Signed-off-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 06ed7e54033e..4d2e67f633b6 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -278,6 +278,7 @@ void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; extern u32 acpi_irq_not_handled; extern unsigned int acpi_sci_irq; +extern bool acpi_no_s5; #define INVALID_ACPI_IRQ ((unsigned)-1) static inline bool acpi_sci_irq_valid(void) { -- cgit v1.2.3 From f4d05266032346531b9f889e26aa31a0cf2a9822 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 29 Mar 2016 14:52:23 +0300 Subject: device property: don't bother the drivers with struct property_set Since device_add_property_set() now always takes a copy of the property_set, and also since the fwnode type is always hard coded to be FWNODE_PDATA, there is no need for the drivers to deliver the entire struct property_set. The function can just create the instance of it on its own and bind the properties from the drivers to it on the spot. This renames device_add_property_set() to device_add_properties(). The function now takes struct property_entry as its parameter instead of struct property_set. Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Acked-by: Thierry Reding Acked-by: Lee Jones Signed-off-by: Heikki Krogerus Signed-off-by: Rafael J. Wysocki --- include/linux/mfd/core.h | 4 ++-- include/linux/platform_device.h | 6 +++--- include/linux/property.h | 15 +++------------ 3 files changed, 8 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index bc6f7e00fb3d..9837f1e8c94c 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -17,7 +17,7 @@ #include struct irq_domain; -struct property_set; +struct property_entry; /* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */ struct mfd_cell_acpi_match { @@ -47,7 +47,7 @@ struct mfd_cell { size_t pdata_size; /* device properties passed to the sub devices drivers */ - const struct property_set *pset; + struct property_entry *properties; /* * Device Tree compatible string diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 03b755521fd9..98c2a7c7108e 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -18,7 +18,7 @@ #define PLATFORM_DEVID_AUTO (-2) struct mfd_cell; -struct property_set; +struct property_entry; struct platform_device { const char *name; @@ -73,7 +73,7 @@ struct platform_device_info { size_t size_data; u64 dma_mask; - const struct property_set *pset; + struct property_entry *properties; }; extern struct platform_device *platform_device_register_full( const struct platform_device_info *pdevinfo); @@ -172,7 +172,7 @@ extern int platform_device_add_resources(struct platform_device *pdev, extern int platform_device_add_data(struct platform_device *pdev, const void *data, size_t size); extern int platform_device_add_properties(struct platform_device *pdev, - const struct property_set *pset); + struct property_entry *properties); extern int platform_device_add(struct platform_device *pdev); extern void platform_device_del(struct platform_device *pdev); extern void platform_device_put(struct platform_device *pdev); diff --git a/include/linux/property.h b/include/linux/property.h index b51fcd36d892..ecab11e40794 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -238,18 +238,9 @@ struct property_entry { .name = _name_, \ } -/** - * struct property_set - Collection of "built-in" device properties. - * @fwnode: Handle to be pointed to by the fwnode field of struct device. - * @properties: Array of properties terminated with a null entry. - */ -struct property_set { - struct fwnode_handle fwnode; - struct property_entry *properties; -}; - -int device_add_property_set(struct device *dev, const struct property_set *pset); -void device_remove_property_set(struct device *dev); +int device_add_properties(struct device *dev, + struct property_entry *properties); +void device_remove_properties(struct device *dev); bool device_dma_supported(struct device *dev); -- cgit v1.2.3 From c68ae33e7fb4a010f9a48af3e4b87089dca96551 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 24 Mar 2016 13:15:20 +0100 Subject: ACPI / utils: Rename acpi_dev_present() acpi_dev_present() was originally named after pci_dev_present() to signify the similarity of the two functions. However Rafael J. Wysocki pointed out that the exported function acpi_dev_present() is easily confused with the non-exported acpi_device_is_present(). Additionally in ACPI parlance the term "present" usually refers to the "device is present" bit returned by the _STA control method, yet acpi_dev_present() merely checks presence in the namespace. It does not invoke _STA at all, let alone check the "device is present" bit. As suggested by Rafael, rename the function to acpi_dev_found() and adjust all existing call sites. Signed-off-by: Lukas Wunner Signed-off-by: Rafael J. Wysocki --- include/linux/apple-gmux.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h index b2d32e01dfe4..714186de8c36 100644 --- a/include/linux/apple-gmux.h +++ b/include/linux/apple-gmux.h @@ -35,7 +35,7 @@ */ static inline bool apple_gmux_present(void) { - return acpi_dev_present(GMUX_ACPI_HID); + return acpi_dev_found(GMUX_ACPI_HID); } #else /* !CONFIG_APPLE_GMUX */ -- cgit v1.2.3 From 32b9b10961860860268961d9aad0c56a73018c37 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Feb 2016 13:19:09 -0800 Subject: clk: Allow clocks to be marked as CRITICAL Critical clocks are those which must not be gated, else undefined or catastrophic failure would occur. Here we have chosen to ensure the prepare/enable counts are correctly incremented, so as not to confuse users with enabled clocks with no visible users. Signed-off-by: Lee Jones Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/1455225554-13267-2-git-send-email-mturquette@baylibre.com --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index da95258127aa..0638b4154502 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -32,6 +32,7 @@ #define CLK_GET_ACCURACY_NOCACHE BIT(8) /* do not use the cached clk accuracy */ #define CLK_RECALC_NEW_RATES BIT(9) /* recalc rates after notifications */ #define CLK_SET_RATE_UNGATE BIT(10) /* clock needs to run to set rate */ +#define CLK_IS_CRITICAL BIT(11) /* do not gate, ever */ struct clk; struct clk_hw; -- cgit v1.2.3 From d56f8994b6fb928f59481fabc25bcd1c2f9bd06d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 11 Feb 2016 13:19:11 -0800 Subject: clk: Provide OF helper to mark clocks as CRITICAL This call matches clocks which have been marked as critical in DT and sets the appropriate flag. These flags can then be used to mark the clock core flags appropriately prior to registration. Legacy bindings requiring this feature must add the clock-critical property to their binding descriptions, as it is not a part of common-clock binding. Cc: devicetree@vger.kernel.org Signed-off-by: Lee Jones Reviewed-by: Stephen Boyd Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/1455225554-13267-4-git-send-email-mturquette@baylibre.com --- include/linux/clk-provider.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 0638b4154502..156286445a25 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -721,7 +721,8 @@ unsigned int of_clk_get_parent_count(struct device_node *np); int of_clk_parent_fill(struct device_node *np, const char **parents, unsigned int size); const char *of_clk_get_parent_name(struct device_node *np, int index); - +int of_clk_detect_critical(struct device_node *np, int index, + unsigned long *flags); void of_clk_init(const struct of_device_id *matches); #else /* !CONFIG_OF */ @@ -758,6 +759,11 @@ static inline const char *of_clk_get_parent_name(struct device_node *np, { return NULL; } +static inline int of_clk_detect_critical(struct device_node *np, int index, + unsigned long *flags) +{ + return 0; +} static inline void of_clk_init(const struct of_device_id *matches) {} #endif /* CONFIG_OF */ -- cgit v1.2.3 From b296821a7c42fa58baa17513b2b7b30ae66f3336 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Apr 2016 20:48:24 -0400 Subject: xattr_handler: pass dentry and inode as separate arguments of ->get() ... and do not assume they are already attached to each other Signed-off-by: Al Viro --- include/linux/xattr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 4457541de3c9..c11c022298b9 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -30,7 +30,8 @@ struct xattr_handler { int flags; /* fs private flags */ bool (*list)(struct dentry *dentry); int (*get)(const struct xattr_handler *, struct dentry *dentry, - const char *name, void *buffer, size_t size); + struct inode *inode, const char *name, void *buffer, + size_t size); int (*set)(const struct xattr_handler *, struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags); -- cgit v1.2.3 From 3c9d6296b7aee536a96ea2b53a15d23511738c1c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 8 Apr 2016 12:20:30 +0200 Subject: security: drop the unused hook skb_owned_by The skb_owned_by hook was added with the commit ca10b9e9a8ca ("selinux: add a skb_owned_by() hook") and later removed when said commit was reverted. Later on, when switching to list of hooks, a field named 'skb_owned_by' was included into the security_hook_head struct, but without any users nor caller. This commit removes the said left-over field. Fixes: b1d9e6b0646d ("LSM: Switch to lists of hooks") Signed-off-by: Paolo Abeni Acked-by: Casey Schaufler Acked-by: Paul Moore Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index cdee11cbcdf1..ae2537886177 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1804,7 +1804,6 @@ struct security_hook_heads { struct list_head tun_dev_attach_queue; struct list_head tun_dev_attach; struct list_head tun_dev_open; - struct list_head skb_owned_by; #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM struct list_head xfrm_policy_alloc_security; -- cgit v1.2.3 From ce23e640133484eebc20ca7b7668388213e11327 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 11 Apr 2016 00:48:00 -0400 Subject: ->getxattr(): pass dentry and inode as separate arguments Signed-off-by: Al Viro --- include/linux/fs.h | 3 ++- include/linux/xattr.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 329ed372d708..1b5fcaeea827 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1702,7 +1702,8 @@ struct inode_operations { int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); + ssize_t (*getxattr) (struct dentry *, struct inode *, + const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index c11c022298b9..1cc4c578deb9 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -52,7 +52,7 @@ int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, i int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); int vfs_removexattr(struct dentry *, const char *); -ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); +ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); int generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int generic_removexattr(struct dentry *dentry, const char *name); -- cgit v1.2.3 From adae28c59a6a71a971ffed713550911546df0e20 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 23 Mar 2016 11:36:53 +0100 Subject: mfd: syscon: Include errno.h from header The syscon header uses the ENOTSUPP error constant, but doesn't include the header that defines it. This causes a build error after the imx pinctrl driver started using syscon: include/linux/mfd/syscon.h: In function 'syscon_node_to_regmap': include/linux/mfd/syscon.h:32:18: error: 'ENOTSUPP' undeclared (first use in this function) return ERR_PTR(-ENOTSUPP); ^~~~~~~~ This adds the missing #include. Signed-off-by: Arnd Bergmann Fixes: 8626ada871f1 ("pinctrl: imx: attach iomuxc device to gpr syscon") Signed-off-by: Lee Jones --- include/linux/mfd/syscon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index 1088149be0c9..40a76b97b7ab 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -16,6 +16,7 @@ #define __LINUX_MFD_SYSCON_H__ #include +#include struct device_node; -- cgit v1.2.3 From 40a865500c4d408b552cf5899bf091ac72e32bf8 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 7 Apr 2016 16:22:38 +0200 Subject: regulator: as3722: Add bypass support for LDO6 LD06 on the AS3722 power management IC supports a bypass mode. Bypass is enabled for the LDO by writing the value 0x3F to the voltage select field in the control register for the LDO. Note that this is the same register and field that is used to select the voltage as well for the LDO. Add support for bypass on LDO6 by specifying the various bypass parameters for regulator and adding new function pointer tables for the LDO. Note that the bypass OFF value is the same as the ON value simply because there is no actual OFF value and bypass will be disabled when a new voltage is written to the VSEL field. Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Mark Brown --- include/linux/mfd/as3722.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/as3722.h b/include/linux/mfd/as3722.h index 8d43e9f2a842..51e6f9414575 100644 --- a/include/linux/mfd/as3722.h +++ b/include/linux/mfd/as3722.h @@ -196,6 +196,7 @@ #define AS3722_LDO3_VSEL_MIN 0x01 #define AS3722_LDO3_VSEL_MAX 0x2D #define AS3722_LDO3_NUM_VOLT 0x2D +#define AS3722_LDO6_VSEL_BYPASS 0x3F #define AS3722_LDO_VSEL_MASK 0x7F #define AS3722_LDO_VSEL_MIN 0x01 #define AS3722_LDO_VSEL_MAX 0x7F -- cgit v1.2.3 From a16d6ebca6efb73f6402f36e5aebf84f61721856 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 3 Apr 2016 20:44:45 +0200 Subject: i2c: introduce helper function to get 8 bit address from a message Drivers do this in various ways, let's use one standard way of doing it. Note: I2C_M_RD is bit 0, so the code could be simplified. To be extremly robust and to advertise good coding practices, I still use the ternary operator and let the compilers do the optimizing job. Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 200cf13b00f6..c30833b7b073 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -654,6 +654,11 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap) return adap->nr; } +static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) +{ + return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0); +} + /** * module_i2c_driver() - Helper macro for registering a modular I2C driver * @__i2c_driver: i2c_driver struct -- cgit v1.2.3 From f0af9593372abfde34460aa1250e670cc535a7d8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 24 Feb 2016 13:43:45 -0600 Subject: PCI: Add pci_add_dma_alias() to abstract implementation Add a pci_add_dma_alias() interface to encapsulate the details of adding an alias. No functional change intended. Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 004b8133417d..7e7019064437 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1988,6 +1988,7 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) } #endif +void pci_add_dma_alias(struct pci_dev *dev, u8 devfn); int pci_for_each_dma_alias(struct pci_dev *pdev, int (*fn)(struct pci_dev *pdev, u16 alias, void *data), void *data); -- cgit v1.2.3 From 338c3149a221527e202ee26b1e35f76c965bb6c0 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Thu, 3 Mar 2016 15:38:02 +0100 Subject: PCI: Add support for multiple DMA aliases Solve IOMMU support issues with PCIe non-transparent bridges that use Requester ID look-up tables (RID-LUT), e.g., the PEX8733. The NTB connects devices in two independent PCI domains. Devices separated by the NTB are not able to discover each other. A PCI packet being forwared from one domain to another has to have its RID modified so it appears on correct bus and completions are forwarded back to the original domain through the NTB. The RID is translated using a preprogrammed table (LUT) and the PCI packet propagates upstream away from the NTB. If the destination system has IOMMU enabled, the packet will be discarded because the new RID is unknown to the IOMMU. Adding a DMA alias for the new RID allows IOMMU to properly recognize the packet. Each device behind the NTB has a unique RID assigned in the RID-LUT. The current DMA alias implementation supports only a single alias, so it's not possible to support mutiple devices behind the NTB when IOMMU is enabled. Enable all possible aliases on a given bus (256) that are stored in a bitset. Alias devfn is directly translated to a bit number. The bitset is not allocated for devices that have no need for DMA aliases. More details can be found in the following article: http://www.plxtech.com/files/pdf/technical/expresslane/RTC_Enabling%20MulitHostSystemDesigns.pdf Signed-off-by: Jacek Lawrynowicz Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson Acked-by: David Woodhouse Acked-by: Joerg Roedel --- include/linux/pci.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7e7019064437..5581d05f7833 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -166,8 +166,6 @@ enum pci_dev_flags { PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2), /* Flag for quirk use to store if quirk-specific ACS is enabled */ PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3), - /* Flag to indicate the device uses dma_alias_devfn */ - PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4), /* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */ PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5), /* Do not use bus resets for device */ @@ -273,7 +271,7 @@ struct pci_dev { u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ u16 pcie_flags_reg; /* cached PCIe Capabilities Register */ - u8 dma_alias_devfn;/* devfn of DMA alias, if any */ + unsigned long *dma_alias_mask;/* mask of enabled devfn aliases */ struct pci_driver *driver; /* which driver has allocated this device */ u64 dma_mask; /* Mask of the bits of bus address this @@ -1989,6 +1987,7 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) #endif void pci_add_dma_alias(struct pci_dev *dev, u8 devfn); +bool pci_devs_are_dma_aliases(struct pci_dev *dev1, struct pci_dev *dev2); int pci_for_each_dma_alias(struct pci_dev *pdev, int (*fn)(struct pci_dev *pdev, u16 alias, void *data), void *data); -- cgit v1.2.3 From 5ac7eace2d00eab5ae0e9fdee63e38aee6001f7c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Apr 2016 16:14:24 +0100 Subject: KEYS: Add a facility to restrict new links into a keyring Add a facility whereby proposed new links to be added to a keyring can be vetted, permitting them to be rejected if necessary. This can be used to block public keys from which the signature cannot be verified or for which the signature verification fails. It could also be used to provide blacklisting. This affects operations like add_key(), KEYCTL_LINK and KEYCTL_INSTANTIATE. To this end: (1) A function pointer is added to the key struct that, if set, points to the vetting function. This is called as: int (*restrict_link)(struct key *keyring, const struct key_type *key_type, unsigned long key_flags, const union key_payload *key_payload), where 'keyring' will be the keyring being added to, key_type and key_payload will describe the key being added and key_flags[*] can be AND'ed with KEY_FLAG_TRUSTED. [*] This parameter will be removed in a later patch when KEY_FLAG_TRUSTED is removed. The function should return 0 to allow the link to take place or an error (typically -ENOKEY, -ENOPKG or -EKEYREJECTED) to reject the link. The pointer should not be set directly, but rather should be set through keyring_alloc(). Note that if called during add_key(), preparse is called before this method, but a key isn't actually allocated until after this function is called. (2) KEY_ALLOC_BYPASS_RESTRICTION is added. This can be passed to key_create_or_update() or key_instantiate_and_link() to bypass the restriction check. (3) KEY_FLAG_TRUSTED_ONLY is removed. The entire contents of a keyring with this restriction emplaced can be considered 'trustworthy' by virtue of being in the keyring when that keyring is consulted. (4) key_alloc() and keyring_alloc() take an extra argument that will be used to set restrict_link in the new key. This ensures that the pointer is set before the key is published, thus preventing a window of unrestrictedness. Normally this argument will be NULL. (5) As a temporary affair, keyring_restrict_trusted_only() is added. It should be passed to keyring_alloc() as the extra argument instead of setting KEY_FLAG_TRUSTED_ONLY on a keyring. This will be replaced in a later patch with functions that look in the appropriate places for authoritative keys. Signed-off-by: David Howells Reviewed-by: Mimi Zohar --- include/linux/key.h | 53 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 5f5b1129dc92..83b603639d2e 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -174,10 +174,9 @@ struct key { #define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ #define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ #define KEY_FLAG_TRUSTED 8 /* set if key is trusted */ -#define KEY_FLAG_TRUSTED_ONLY 9 /* set if keyring only accepts links to trusted keys */ -#define KEY_FLAG_BUILTIN 10 /* set if key is builtin */ -#define KEY_FLAG_ROOT_CAN_INVAL 11 /* set if key can be invalidated by root without permission */ -#define KEY_FLAG_KEEP 12 /* set if key should not be removed */ +#define KEY_FLAG_BUILTIN 9 /* set if key is built in to the kernel */ +#define KEY_FLAG_ROOT_CAN_INVAL 10 /* set if key can be invalidated by root without permission */ +#define KEY_FLAG_KEEP 11 /* set if key should not be removed */ /* the key type and key description string * - the desc is used to match a key against search criteria @@ -205,6 +204,21 @@ struct key { }; int reject_error; }; + + /* This is set on a keyring to restrict the addition of a link to a key + * to it. If this method isn't provided then it is assumed that the + * keyring is open to any addition. It is ignored for non-keyring + * keys. + * + * This is intended for use with rings of trusted keys whereby addition + * to the keyring needs to be controlled. KEY_ALLOC_BYPASS_RESTRICTION + * overrides this, allowing the kernel to add extra keys without + * restriction. + */ + int (*restrict_link)(struct key *keyring, + const struct key_type *type, + unsigned long flags, + const union key_payload *payload); }; extern struct key *key_alloc(struct key_type *type, @@ -212,14 +226,19 @@ extern struct key *key_alloc(struct key_type *type, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, - unsigned long flags); + unsigned long flags, + int (*restrict_link)(struct key *, + const struct key_type *, + unsigned long, + const union key_payload *)); -#define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ -#define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ -#define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ -#define KEY_ALLOC_TRUSTED 0x0004 /* Key should be flagged as trusted */ -#define KEY_ALLOC_BUILT_IN 0x0008 /* Key is built into kernel */ +#define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ +#define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ +#define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ +#define KEY_ALLOC_TRUSTED 0x0004 /* Key should be flagged as trusted */ +#define KEY_ALLOC_BUILT_IN 0x0008 /* Key is built into kernel */ +#define KEY_ALLOC_BYPASS_RESTRICTION 0x0010 /* Override the check on restricted keyrings */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); @@ -288,8 +307,22 @@ extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid const struct cred *cred, key_perm_t perm, unsigned long flags, + int (*restrict_link)(struct key *, + const struct key_type *, + unsigned long, + const union key_payload *), struct key *dest); +extern int keyring_restrict_trusted_only(struct key *keyring, + const struct key_type *type, + unsigned long, + const union key_payload *payload); + +extern int restrict_link_reject(struct key *keyring, + const struct key_type *type, + unsigned long flags, + const union key_payload *payload); + extern int keyring_clear(struct key *keyring); extern key_ref_t keyring_search(key_ref_t keyring, -- cgit v1.2.3 From 77f68bac9481ad440f4f34dda3d28c2dce6eb87b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Apr 2016 16:14:26 +0100 Subject: KEYS: Remove KEY_FLAG_TRUSTED and KEY_ALLOC_TRUSTED Remove KEY_FLAG_TRUSTED and KEY_ALLOC_TRUSTED as they're no longer meaningful. Also we can drop the trusted flag from the preparse structure. Given this, we no longer need to pass the key flags through to restrict_link(). Further, we can now get rid of keyring_restrict_trusted_only() also. Signed-off-by: David Howells --- include/linux/key-type.h | 1 - include/linux/key.h | 21 +++++---------------- 2 files changed, 5 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 7463355a198b..eaee981c5558 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -45,7 +45,6 @@ struct key_preparsed_payload { size_t datalen; /* Raw datalen */ size_t quotalen; /* Quota length for proposed payload */ time_t expiry; /* Expiry time of key */ - bool trusted; /* True if key is trusted */ }; typedef int (*request_key_actor_t)(struct key_construction *key, diff --git a/include/linux/key.h b/include/linux/key.h index 83b603639d2e..722914798f37 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -173,10 +173,9 @@ struct key { #define KEY_FLAG_NEGATIVE 5 /* set if key is negative */ #define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ #define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ -#define KEY_FLAG_TRUSTED 8 /* set if key is trusted */ -#define KEY_FLAG_BUILTIN 9 /* set if key is built in to the kernel */ -#define KEY_FLAG_ROOT_CAN_INVAL 10 /* set if key can be invalidated by root without permission */ -#define KEY_FLAG_KEEP 11 /* set if key should not be removed */ +#define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */ +#define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */ +#define KEY_FLAG_KEEP 10 /* set if key should not be removed */ /* the key type and key description string * - the desc is used to match a key against search criteria @@ -217,7 +216,6 @@ struct key { */ int (*restrict_link)(struct key *keyring, const struct key_type *type, - unsigned long flags, const union key_payload *payload); }; @@ -229,16 +227,14 @@ extern struct key *key_alloc(struct key_type *type, unsigned long flags, int (*restrict_link)(struct key *, const struct key_type *, - unsigned long, const union key_payload *)); #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ #define KEY_ALLOC_QUOTA_OVERRUN 0x0001 /* add to quota, permit even if overrun */ #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ -#define KEY_ALLOC_TRUSTED 0x0004 /* Key should be flagged as trusted */ -#define KEY_ALLOC_BUILT_IN 0x0008 /* Key is built into kernel */ -#define KEY_ALLOC_BYPASS_RESTRICTION 0x0010 /* Override the check on restricted keyrings */ +#define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */ +#define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); @@ -309,18 +305,11 @@ extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid unsigned long flags, int (*restrict_link)(struct key *, const struct key_type *, - unsigned long, const union key_payload *), struct key *dest); -extern int keyring_restrict_trusted_only(struct key *keyring, - const struct key_type *type, - unsigned long, - const union key_payload *payload); - extern int restrict_link_reject(struct key *keyring, const struct key_type *type, - unsigned long flags, const union key_payload *payload); extern int keyring_clear(struct key *keyring); -- cgit v1.2.3 From 9a6f2b0113c8fce815db7c9d23754bdea4b428a0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 11 Apr 2016 21:40:05 +0200 Subject: net: mdio: Fix lockdep falls positive splat MDIO devices can be stacked upon each other. The current code supports two levels, which until recently has been enough for a DSA mdio bus on top of another bus. Now we have hardware which has an MDIO mux in the middle. Define an MDIO MUTEX class with three levels. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 5bfd99d1a40a..bf9d1d750693 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -13,6 +13,17 @@ struct mii_bus; +/* Multiple levels of nesting are possible. However typically this is + * limited to nested DSA like layer, a MUX layer, and the normal + * user. Instead of trying to handle the general case, just define + * these cases. + */ +enum mdio_mutex_lock_class { + MDIO_MUTEX_NORMAL, + MDIO_MUTEX_MUX, + MDIO_MUTEX_NESTED, +}; + struct mdio_device { struct device dev; -- cgit v1.2.3 From 757d12e5849be549076901b0d33c60d5f360269c Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 12 Apr 2016 21:07:06 +0530 Subject: dmaengine: ensure dmaengine helpers check valid callback dmaengine has various device callbacks and exposes helper functions to invoke these. These helpers should check if channel, device and callback is valid or not before invoking them. Reported-by: Jon Hunter Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 017433712833..30de0197263a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -804,6 +804,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( sg_dma_address(&sg) = buf; sg_dma_len(&sg) = len; + if (!chan || !chan->device || !chan->device->device_prep_slave_sg) + return NULL; + return chan->device->device_prep_slave_sg(chan, &sg, 1, dir, flags, NULL); } @@ -812,6 +815,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction dir, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_slave_sg) + return NULL; + return chan->device->device_prep_slave_sg(chan, sgl, sg_len, dir, flags, NULL); } @@ -823,6 +829,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_rio_sg( enum dma_transfer_direction dir, unsigned long flags, struct rio_dma_ext *rio_ext) { + if (!chan || !chan->device || !chan->device->device_prep_slave_sg) + return NULL; + return chan->device->device_prep_slave_sg(chan, sgl, sg_len, dir, flags, rio_ext); } @@ -833,6 +842,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic( size_t period_len, enum dma_transfer_direction dir, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_dma_cyclic) + return NULL; + return chan->device->device_prep_dma_cyclic(chan, buf_addr, buf_len, period_len, dir, flags); } @@ -841,6 +853,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma( struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_interleaved_dma) + return NULL; + return chan->device->device_prep_interleaved_dma(chan, xt, flags); } @@ -848,7 +863,7 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags) { - if (!chan || !chan->device) + if (!chan || !chan->device || !chan->device->device_prep_dma_memset) return NULL; return chan->device->device_prep_dma_memset(chan, dest, value, @@ -861,6 +876,9 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg( struct scatterlist *src_sg, unsigned int src_nents, unsigned long flags) { + if (!chan || !chan->device || !chan->device->device_prep_dma_sg) + return NULL; + return chan->device->device_prep_dma_sg(chan, dst_sg, dst_nents, src_sg, src_nents, flags); } -- cgit v1.2.3 From 37e58237a16b94fcd2c2d1b7e9c6e1ca661c231b Mon Sep 17 00:00:00 2001 From: Ming Lin Date: Tue, 22 Mar 2016 00:24:44 -0700 Subject: block: add offset in blk_add_request_payload() We could kmalloc() the payload, so need the offset in page. Signed-off-by: Ming Lin Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 669e419d6234..bbaa76757018 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -779,7 +779,7 @@ extern struct request *blk_make_request(struct request_queue *, struct bio *, extern void blk_rq_set_block_pc(struct request *); extern void blk_requeue_request(struct request_queue *, struct request *); extern void blk_add_request_payload(struct request *rq, struct page *page, - unsigned int len); + int offset, unsigned int len); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, -- cgit v1.2.3 From e0489487ec9cd79ee1fa0dc5d3789c08b0e51a2c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 10 Mar 2016 13:58:46 +0200 Subject: blk-mq: Export tagset iter function Its useful to iterate on all the active tags in cases where we will need to fail all the queues IO. Signed-off-by: Sagi Grimberg [hch: carefully check for valid tagsets] Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9ac9799b702b..c808fec1ce44 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -240,6 +240,8 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv); +void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, + busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); -- cgit v1.2.3 From e8f1e1630b0a98685d1a3521e8aba0dc7e68082c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 10 Mar 2016 13:58:49 +0200 Subject: blk-mq: Make blk_mq_all_tag_busy_iter static No caller outside the blk-mq code so we can settle with it static. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c808fec1ce44..2498fdf3a503 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -238,8 +238,6 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, - void *priv); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); -- cgit v1.2.3 From 9fd4dcece43a53e5a9e65a973df5693702ee6401 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:13 +0100 Subject: debugfs: prevent access to possibly dead file_operations at file open Nothing prevents a dentry found by path lookup before a return of __debugfs_remove() to actually get opened after that return. Now, after the return of __debugfs_remove(), there are no guarantees whatsoever regarding the memory the corresponding inode's file_operations object had been kept in. Since __debugfs_remove() is seldomly invoked, usually from module exit handlers only, the race is hard to trigger and the impact is very low. A discussion of the problem outlined above as well as a suggested solution can be found in the (sub-)thread rooted at http://lkml.kernel.org/g/20130401203445.GA20862@ZenIV.linux.org.uk ("Yet another pipe related oops.") Basically, Greg KH suggests to introduce an intermediate fops and Al Viro points out that a pointer to the original ones may be stored in ->d_fsdata. Follow this line of reasoning: - Add SRCU as a reverse dependency of DEBUG_FS. - Introduce a srcu_struct object for the debugfs subsystem. - In debugfs_create_file(), store a pointer to the original file_operations object in ->d_fsdata. - Make debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace period after the dentry has been delete()'d and before they return to their callers. - Introduce an intermediate file_operations object named "debugfs_open_proxy_file_operations". It's ->open() functions checks, under the protection of a SRCU read lock, whether the dentry is still alive, i.e. has not been d_delete()'d and if so, tries to acquire a reference on the owning module. On success, it sets the file object's ->f_op to the original file_operations and forwards the ongoing open() call to the original ->open(). - For clarity, rename the former debugfs_file_operations to debugfs_noop_file_operations -- they are in no way canonical. The choice of SRCU over "normal" RCU is justified by the fact, that the former may also be used to protect ->i_private data from going away during the execution of a file's readers and writers which may (and do) sleep. Finally, introduce the fs/debugfs/internal.h header containing some declarations internal to the debugfs implementation. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 981e53ab84e8..fcafe2d389f9 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -43,9 +43,6 @@ extern struct dentry *arch_debugfs_dir; #if defined(CONFIG_DEBUG_FS) -/* declared over in file.c */ -extern const struct file_operations debugfs_file_operations; - struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); -- cgit v1.2.3 From 49d200deaa680501f19a247b1fffb29301e51d2b Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:14 +0100 Subject: debugfs: prevent access to removed files' private data Upon return of debugfs_remove()/debugfs_remove_recursive(), it might still be attempted to access associated private file data through previously opened struct file objects. If that data has been freed by the caller of debugfs_remove*() in the meanwhile, the reading/writing process would either encounter a fault or, if the memory address in question has been reassigned again, unrelated data structures could get overwritten. However, since debugfs files are seldomly removed, usually from module exit handlers only, the impact is very low. Currently, there are ~1000 call sites of debugfs_create_file() spread throughout the whole tree and touching all of those struct file_operations in order to make them file removal aware by means of checking the result of debugfs_use_file_start() from within their methods is unfeasible. Instead, wrap the struct file_operations by a lifetime managing proxy at file open: - In debugfs_create_file(), the original fops handed in has got stashed away in ->d_fsdata already. - In debugfs_create_file(), install a proxy file_operations factory, debugfs_full_proxy_file_operations, at ->i_fop. This proxy factory has got an ->open() method only. It carries out some lifetime checks and if successful, dynamically allocates and sets up a new struct file_operations proxy at ->f_op. Afterwards, it forwards to the ->open() of the original struct file_operations in ->d_fsdata, if any. The dynamically set up proxy at ->f_op has got a lifetime managing wrapper set for each of the methods defined in the original struct file_operations in ->d_fsdata. Its ->release()er frees the proxy again and forwards to the original ->release(), if any. In order not to mislead the VFS layer, it is strictly necessary to leave those fields blank in the proxy that have been NULL in the original struct file_operations also, i.e. aren't supported. This is why there is a need for dynamically allocated proxies. The choice made not to allocate a proxy instance for every dentry at file creation, but for every struct file object instantiated thereof is justified by the expected usage pattern of debugfs, namely that in general very few files get opened more than once at a time. The wrapper methods set in the struct file_operations implement lifetime managing by means of the SRCU protection facilities already in place for debugfs: They set up a SRCU read side critical section and check whether the dentry is still alive by means of debugfs_use_file_start(). If so, they forward the call to the original struct file_operation stored in ->d_fsdata, still under the protection of the SRCU read side critical section. This SRCU read side critical section prevents any pending debugfs_remove() and friends to return to their callers. Since a file's private data must only be freed after the return of debugfs_remove(), the ongoing proxied call is guarded against any file removal race. If, on the other hand, the initial call to debugfs_use_file_start() detects that the dentry is dead, the wrapper simply returns -EIO and does not forward the call. Note that the ->poll() wrapper is special in that its signature does not allow for the return of arbitrary -EXXX values and thus, POLLHUP is returned here. In order not to pollute debugfs with wrapper definitions that aren't ever needed, I chose not to define a wrapper for every struct file_operations method possible. Instead, a wrapper is defined only for the subset of methods which are actually set by any debugfs users. Currently, these are: ->llseek() ->read() ->write() ->unlocked_ioctl() ->poll() The ->release() wrapper is special in that it does not protect the original ->release() in any way from dead files in order not to leak resources. Thus, any ->release() handed to debugfs must implement file lifetime management manually, if needed. For only 33 out of a total of 434 releasers handed in to debugfs, it could not be verified immediately whether they access data structures that might have been freed upon a debugfs_remove() return in the meanwhile. Export debugfs_use_file_start() and debugfs_use_file_finish() in order to allow any ->release() to manually implement file lifetime management. For a set of common cases of struct file_operations implemented by the debugfs_core itself, future patches will incorporate file lifetime management directly within those in order to allow for their unproxied operation. Rename the original, non-proxying "debugfs_create_file()" to "debugfs_create_file_unsafe()" and keep it for future internal use by debugfs itself. Factor out code common to both into the new __debugfs_create_file(). Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index fcafe2d389f9..a63e6ea3321c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -19,9 +19,11 @@ #include #include +#include struct device; struct file_operations; +struct srcu_struct; struct debugfs_blob_wrapper { void *data; @@ -41,6 +43,8 @@ struct debugfs_regset32 { extern struct dentry *arch_debugfs_dir; +extern struct srcu_struct debugfs_srcu; + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_create_file(const char *name, umode_t mode, @@ -65,6 +69,11 @@ struct dentry *debugfs_create_automount(const char *name, void debugfs_remove(struct dentry *dentry); void debugfs_remove_recursive(struct dentry *dentry); +int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) + __acquires(&debugfs_srcu); + +void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); + struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -173,6 +182,17 @@ static inline void debugfs_remove(struct dentry *dentry) static inline void debugfs_remove_recursive(struct dentry *dentry) { } +static inline int debugfs_use_file_start(const struct dentry *dentry, + int *srcu_idx) + __acquires(&debugfs_srcu) +{ + return 0; +} + +static inline void debugfs_use_file_finish(int srcu_idx) + __releases(&debugfs_srcu) +{ } + static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) { -- cgit v1.2.3 From c64688081490321f2d23a292ef24e60bb321f3f1 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 22 Mar 2016 14:11:15 +0100 Subject: debugfs: add support for self-protecting attribute file fops In order to protect them against file removal issues, debugfs_create_file() creates a lifetime managing proxy around each struct file_operations handed in. In cases where this struct file_operations is able to manage file lifetime by itself already, the proxy created by debugfs is a waste of resources. The most common class of struct file_operations given to debugfs are those defined by means of the DEFINE_SIMPLE_ATTRIBUTE() macro. Introduce a DEFINE_DEBUGFS_ATTRIBUTE() macro to allow any struct file_operations of this class to be easily made file lifetime aware and thus, to be operated unproxied. Specifically, introduce debugfs_attr_read() and debugfs_attr_write() which wrap simple_attr_read() and simple_attr_write() under the protection of a debugfs_use_file_start()/debugfs_use_file_finish() pair. Make DEFINE_DEBUGFS_ATTRIBUTE() set the defined struct file_operations' ->read() and ->write() members to these wrappers. Export debugfs_create_file_unsafe() in order to allow debugfs users to create their files in non-proxying operation mode. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index a63e6ea3321c..1438e2322d5c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -50,6 +50,9 @@ extern struct srcu_struct debugfs_srcu; struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); +struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); struct dentry *debugfs_create_file_size(const char *name, umode_t mode, struct dentry *parent, void *data, @@ -74,6 +77,26 @@ int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); +ssize_t debugfs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos); +ssize_t debugfs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); + +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return simple_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = simple_attr_release, \ + .read = debugfs_attr_read, \ + .write = debugfs_attr_write, \ + .llseek = generic_file_llseek, \ +} + struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -193,6 +216,9 @@ static inline void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) { } +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ + static const struct file_operations __fops = { 0 } + static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) { -- cgit v1.2.3 From 93e9d8e836cb1a9a58b33eb6643bf061c6119ef2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2016 12:32:46 -0600 Subject: block: add ability to flag write back caching on a device Add an internal helper and flag for setting whether a queue has write back caching, or write through (or none). Add a sysfs file to show this as well, and make it changeable from user space. This will replace the (awkward) blk_queue_flush() interface that drivers currently use to inform the block layer of write cache state and capabilities. Signed-off-by: Jens Axboe Reviewed-by: Christoph Hellwig --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bbaa76757018..ba72687c5654 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -491,6 +491,8 @@ struct request_queue { #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ #define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ #define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ +#define QUEUE_FLAG_WC 23 /* Write back caching */ +#define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -1009,6 +1011,7 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); +extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); -- cgit v1.2.3 From 2245f6de6c68b225986229a2de78c240536f7f38 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Mar 2016 10:19:30 -0600 Subject: block: kill blk_queue_flush() We don't have any drivers left using it, so kill it off. Update documentation to use the newer blk_queue_write_cache(). Signed-off-by: Jens Axboe Reviewed-by: Christoph Hellwig --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba72687c5654..f3f232fa505d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1009,7 +1009,6 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); -extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.2.3 From 0c034fe37718990e0ffdd9622bd6cc5b4f93111f Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Tue, 12 Apr 2016 17:46:39 -0300 Subject: mtd: Uninline mtd_write_oob and move it to mtdcore.c There's no reason for having mtd_write_oob inlined in mtd.h header. Move it to mtdcore.c where it belongs. Signed-off-by: Ezequiel Garcia Acked-by: Boris Brezillon Signed-off-by: Jacek Anaszewski --- include/linux/mtd/mtd.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 771272187316..ef9fea4fc400 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -283,17 +283,7 @@ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf); int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops); - -static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to, - struct mtd_oob_ops *ops) -{ - ops->retlen = ops->oobretlen = 0; - if (!mtd->_write_oob) - return -EOPNOTSUPP; - if (!(mtd->flags & MTD_WRITEABLE)) - return -EROFS; - return mtd->_write_oob(mtd, to, ops); -} +int mtd_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops); int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, struct otp_info *buf); -- cgit v1.2.3 From 4b721174c7976f26300e39aa42714fb5a54bebb5 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Tue, 12 Apr 2016 17:46:40 -0300 Subject: leds: trigger: Introduce a MTD (NAND/NOR) trigger This commit introduces a MTD trigger for flash (NAND/NOR) device activity. The implementation is copied from IDE disk. This trigger deprecates the "nand-disk" LED trigger, but for backwards compatibility, we still keep the "nand-disk" trigger around. The motivation for deprecating the "nand-disk" LED trigger is that it only works for NAND drivers, whereas the "mtd" LED trigger is more generic (in fact, "nand-disk" currently only works for certain NAND drivers). Signed-off-by: Ezequiel Garcia Acked-by: Boris Brezillon Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index f203a8f89d30..19eb10278bea 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -329,6 +329,12 @@ extern void ledtrig_ide_activity(void); static inline void ledtrig_ide_activity(void) {} #endif +#ifdef CONFIG_LEDS_TRIGGER_MTD +extern void ledtrig_mtd_activity(void); +#else +static inline void ledtrig_mtd_activity(void) {} +#endif + #if defined(CONFIG_LEDS_TRIGGER_CAMERA) || defined(CONFIG_LEDS_TRIGGER_CAMERA_MODULE) extern void ledtrig_flash_ctrl(bool on); extern void ledtrig_torch_ctrl(bool on); -- cgit v1.2.3 From bc405cd69a728d0a82bae8395fe43ec7b0afd1c6 Mon Sep 17 00:00:00 2001 From: Alexandre Macabies Date: Tue, 12 Apr 2016 18:53:00 +0200 Subject: ieee802154: add security bit check function ieee802154_is_secen checks if the 802.15.4 security bit is set in the frame control field. Signed-off-by: Alexander Aring Signed-off-by: Alexandre Macabies Reviewed-by: Stefan Schmidt Acked-by: Alan Ott Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index d3e415674dac..56090f195339 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -218,6 +218,7 @@ enum { /* frame control handling */ #define IEEE802154_FCTL_FTYPE 0x0003 #define IEEE802154_FCTL_ACKREQ 0x0020 +#define IEEE802154_FCTL_SECEN 0x0004 #define IEEE802154_FCTL_INTRA_PAN 0x0040 #define IEEE802154_FTYPE_DATA 0x0001 @@ -232,6 +233,15 @@ static inline int ieee802154_is_data(__le16 fc) cpu_to_le16(IEEE802154_FTYPE_DATA); } +/** + * ieee802154_is_secen - check if Security bit is set + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee802154_is_secen(__le16 fc) +{ + return fc & cpu_to_le16(IEEE802154_FCTL_SECEN); +} + /** * ieee802154_is_ackreq - check if acknowledgment request bit is set * @fc: frame control bytes in little-endian byteorder -- cgit v1.2.3 From b7594148c73cb506487b5f00a6574beceea0e3a0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 11 Apr 2016 11:04:14 +0200 Subject: ieee802154: cleanups for ieee802154.h This patch removes some const from non-pointer types and fixes the function name for the ieee802154_is_valid_extended_unicast_addr comment. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Acked-by: Jukka Rissanen Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 56090f195339..9d84a924b747 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -270,17 +270,17 @@ static inline bool ieee802154_is_intra_pan(__le16 fc) * * @len: psdu len with (MHR + payload + MFR) */ -static inline bool ieee802154_is_valid_psdu_len(const u8 len) +static inline bool ieee802154_is_valid_psdu_len(u8 len) { return (len == IEEE802154_ACK_PSDU_LEN || (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU)); } /** - * ieee802154_is_valid_psdu_len - check if extended addr is valid + * ieee802154_is_valid_extended_unicast_addr - check if extended addr is valid * @addr: extended addr to check */ -static inline bool ieee802154_is_valid_extended_unicast_addr(const __le64 addr) +static inline bool ieee802154_is_valid_extended_unicast_addr(__le64 addr) { /* Bail out if the address is all zero, or if the group * address bit is set. -- cgit v1.2.3 From 118a5cf8ae236cdfa1eb4f21530843a8494722ef Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 11 Apr 2016 11:04:15 +0200 Subject: ieee802154: add short address helpers This patch introduce some short address handling functionality into ieee802154 headers. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Acked-by: Jukka Rissanen Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 9d84a924b747..acedbb68a5a3 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -47,6 +47,7 @@ #define IEEE802154_ADDR_SHORT_UNSPEC 0xfffe #define IEEE802154_EXTENDED_ADDR_LEN 8 +#define IEEE802154_SHORT_ADDR_LEN 2 #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 @@ -289,6 +290,34 @@ static inline bool ieee802154_is_valid_extended_unicast_addr(__le64 addr) !(addr & cpu_to_le64(0x0100000000000000ULL))); } +/** + * ieee802154_is_broadcast_short_addr - check if short addr is broadcast + * @addr: short addr to check + */ +static inline bool ieee802154_is_broadcast_short_addr(__le16 addr) +{ + return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST)); +} + +/** + * ieee802154_is_unspec_short_addr - check if short addr is unspecified + * @addr: short addr to check + */ +static inline bool ieee802154_is_unspec_short_addr(__le16 addr) +{ + return (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC)); +} + +/** + * ieee802154_is_valid_src_short_addr - check if source short address is valid + * @addr: short addr to check + */ +static inline bool ieee802154_is_valid_src_short_addr(__le16 addr) +{ + return !(ieee802154_is_broadcast_short_addr(addr) || + ieee802154_is_unspec_short_addr(addr)); +} + /** * ieee802154_random_extended_addr - generates a random extended address * @addr: extended addr pointer to place the random address -- cgit v1.2.3 From f8e04d854506ddfeba9cb41b601972b28521f104 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Apr 2016 17:12:21 +0200 Subject: locking/rwsem: Get rid of __down_write_nested() This is no longer used anywhere and all callers (__down_write()) use 0 as a subclass. Ditch __down_write_nested() to make the code easier to follow. This shouldn't introduce any functional change. Signed-off-by: Michal Hocko Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Chris Zankel Cc: David S. Miller Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Signed-off-by: Davidlohr Bueso Cc: Signed-off-by: Jason Low Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1460041951-22347-2-git-send-email-mhocko@kernel.org Signed-off-by: Ingo Molnar --- include/linux/rwsem-spinlock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index 561e8615528d..a733a5467e6c 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -34,7 +34,6 @@ struct rw_semaphore { extern void __down_read(struct rw_semaphore *sem); extern int __down_read_trylock(struct rw_semaphore *sem); extern void __down_write(struct rw_semaphore *sem); -extern void __down_write_nested(struct rw_semaphore *sem, int subclass); extern int __down_write_trylock(struct rw_semaphore *sem); extern void __up_read(struct rw_semaphore *sem); extern void __up_write(struct rw_semaphore *sem); -- cgit v1.2.3 From d47996082f52baa0ca8b48d26b3cbef5ede70a73 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Apr 2016 17:12:26 +0200 Subject: locking/rwsem: Introduce basis for down_write_killable() Introduce a generic implementation necessary for down_write_killable(). This is a trivial extension of the already existing down_write() call which can be interrupted by SIGKILL. This patch doesn't provide down_write_killable() yet because arches have to provide the necessary pieces before. rwsem_down_write_failed() which is a generic slow path for the write lock is extended to take a task state and renamed to __rwsem_down_write_failed_common(). The return value is either a valid semaphore pointer or ERR_PTR(-EINTR). rwsem_down_write_failed_killable() is exported as a new way to wait for the lock and be killable. For rwsem-spinlock implementation the current __down_write() it updated in a similar way as __rwsem_down_write_failed_common() except it doesn't need new exports just visible __down_write_killable(). Architectures which are not using the generic rwsem implementation are supposed to provide their __down_write_killable() implementation and use rwsem_down_write_failed_killable() for the slow path. Signed-off-by: Michal Hocko Cc: Andrew Morton Cc: Chris Zankel Cc: David S. Miller Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Signed-off-by: Davidlohr Bueso Cc: Signed-off-by: Jason Low Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1460041951-22347-7-git-send-email-mhocko@kernel.org Signed-off-by: Ingo Molnar --- include/linux/rwsem-spinlock.h | 1 + include/linux/rwsem.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index a733a5467e6c..ae0528b834cd 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -34,6 +34,7 @@ struct rw_semaphore { extern void __down_read(struct rw_semaphore *sem); extern int __down_read_trylock(struct rw_semaphore *sem); extern void __down_write(struct rw_semaphore *sem); +extern int __must_check __down_write_killable(struct rw_semaphore *sem); extern int __down_write_trylock(struct rw_semaphore *sem); extern void __up_read(struct rw_semaphore *sem); extern void __up_write(struct rw_semaphore *sem); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 8f498cdde280..7d7ae029dac5 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_RWSEM_SPIN_ON_OWNER #include #endif @@ -43,6 +44,7 @@ struct rw_semaphore { extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); -- cgit v1.2.3 From 2c923e94cd9c6acff3b22f0ae29cfe65e2658b40 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 11 Apr 2016 16:38:34 +0200 Subject: sched/clock: Make local_clock()/cpu_clock() inline The local_clock/cpu_clock functions were changed to prevent a double identical test with sched_clock_cpu() when HAVE_UNSTABLE_SCHED_CLOCK is set. That resulted in one line functions. As these functions are in all the cases one line functions and in the hot path, it is useful to specify them as static inline in order to give a strong hint to the compiler. After verification, it appears the compiler does not inline them without this hint. Change those functions to static inline. sched_clock_cpu() is called via the inlined local_clock()/cpu_clock() functions from sched.h. So any module code including sched.h will reference sched_clock_cpu(). Thus it must be exported with the EXPORT_SYMBOL_GPL macro. Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1460385514-14700-2-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 52c4847b05e2..13c1c1d07270 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2303,8 +2303,6 @@ extern unsigned long long notrace sched_clock(void); /* * See the comment in kernel/sched/clock.c */ -extern u64 cpu_clock(int cpu); -extern u64 local_clock(void); extern u64 running_clock(void); extern u64 sched_clock_cpu(int cpu); @@ -2323,6 +2321,16 @@ static inline void sched_clock_idle_sleep_event(void) static inline void sched_clock_idle_wakeup_event(u64 delta_ns) { } + +static inline u64 cpu_clock(int cpu) +{ + return sched_clock(); +} + +static inline u64 local_clock(void) +{ + return sched_clock(); +} #else /* * Architectures can set this to 1 if they have specified @@ -2337,6 +2345,26 @@ extern void clear_sched_clock_stable(void); extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); + +/* + * As outlined in clock.c, provides a fast, high resolution, nanosecond + * time source that is monotonic per cpu argument and has bounded drift + * between cpus. + * + * ######################### BIG FAT WARNING ########################## + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # + * # go backwards !! # + * #################################################################### + */ +static inline u64 cpu_clock(int cpu) +{ + return sched_clock_cpu(cpu); +} + +static inline u64 local_clock(void) +{ + return sched_clock_cpu(raw_smp_processor_id()); +} #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING -- cgit v1.2.3 From c422025c185fb2bb28df65b1bbed7953480c7f87 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 18 Mar 2016 16:24:41 +0200 Subject: dmaengine: dw: rename masters to reflect actual topology The source and destination masters are reflecting buses or their layers to where the different devices can be connected. The patch changes the master names to reflect which one is related to which independently on the transfer direction. The outcome of the change is that the memory data width is now always limited by a data width of the master which is dedicated to communicate to memory. The patch will not break anything since all current users have the same data width for all masters. Though it would be nice to revisit avr32 platforms to check what is the actual hardware topology in use there. It seems that it has one bus and two masters on it as stated by Table 8-2, that's why everything works independently on the master in use. The purpose of the sequential patch is to fix the driver for configuration of more than one bus. The change is done in the assumption that src_master and dst_master are reflecting a connection to the memory and peripheral correspondently on avr32 and otherwise on the rest. Acked-by: Hans-Christian Egtvedt Acked-by: Mark Brown Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-dw.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 03b6095d3b18..b881b978e486 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -21,15 +21,15 @@ * @dma_dev: required DMA master device * @src_id: src request line * @dst_id: dst request line - * @src_master: src master for transfers on allocated channel. - * @dst_master: dest master for transfers on allocated channel. + * @m_master: memory master for transfers on allocated channel + * @p_master: peripheral master for transfers on allocated channel */ struct dw_dma_slave { struct device *dma_dev; u8 src_id; u8 dst_id; - u8 src_master; - u8 dst_master; + u8 m_master; + u8 p_master; }; /** -- cgit v1.2.3 From d2d5437bdfdde20a75bdf59db1c1a77721613b22 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 13 Apr 2016 15:29:45 +0530 Subject: regulator: max8973: add support for junction thermal warning The driver MAX8973 supports the driver for Maxim PMIC MAX77621. MAX77621 supports the junction temp warning at 120 degC and 140 degC which is configurable. It generates alert signal when junction temperature crosses these threshold. MAX77621 does not support the continuous temp monitoring of junction temperature. It just report whether junction temperature crossed the threshold or not. Add support to - Configure junction temp warning threshold via DT property to generate alert when it crosses the threshold. - Add support to interrupt the host from this device when alert occurred. - read the junction temp via thermal framework. Signed-off-by: Laxman Dewangan Signed-off-by: Mark Brown --- include/linux/regulator/max8973-regulator.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h index f6a8a16a0d4d..2fcb9980262a 100644 --- a/include/linux/regulator/max8973-regulator.h +++ b/include/linux/regulator/max8973-regulator.h @@ -54,6 +54,10 @@ * @reg_init_data: The regulator init data. * @control_flags: Control flags which are ORed value of above flags to * configure device. + * @junction_temp_warning: Junction temp in millicelcius on which warning need + * to be set. Thermal functionality is only supported on + * MAX77621. The threshold warning supported by MAX77621 + * are 120C and 140C. * @enable_ext_control: Enable the voltage enable/disable through external * control signal from EN input pin. If it is false then * voltage output will be enabled/disabled through EN bit of @@ -67,6 +71,7 @@ struct max8973_regulator_platform_data { struct regulator_init_data *reg_init_data; unsigned long control_flags; + unsigned long junction_temp_warning; bool enable_ext_control; int enable_gpio; int dvs_gpio; -- cgit v1.2.3 From c888a8f95ae5b1067855235b3b71c1ebccf504f5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Apr 2016 13:33:19 -0600 Subject: block: kill off q->flush_flags Now that we converted everything to the newer block write cache interface, kill off the queue flush_flags and queueable flush entries. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3f232fa505d..57c085917da6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -433,8 +433,6 @@ struct request_queue { /* * for flush operations */ - unsigned int flush_flags; - unsigned int flush_not_queueable:1; struct blk_flush_queue *fq; struct list_head requeue_list; @@ -493,6 +491,7 @@ struct request_queue { #define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 23 /* Write back caching */ #define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ +#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -1365,7 +1364,7 @@ static inline unsigned int block_size(struct block_device *bdev) static inline bool queue_flush_queueable(struct request_queue *q) { - return !q->flush_not_queueable; + return !test_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); } typedef struct {struct page *v;} Sector; -- cgit v1.2.3 From 7d35812c3214afa5b37a675113555259cfd67b98 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:23 +0200 Subject: netfilter: x_tables: add and use xt_check_entry_offsets Currently arp/ip and ip6tables each implement a short helper to check that the target offset is large enough to hold one xt_entry_target struct and that t->u.target_size fits within the current rule. Unfortunately these checks are not sufficient. To avoid adding new tests to all of ip/ip6/arptables move the current checks into a helper, then extend this helper in followup patches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 80a305b85323..0de0862897a4 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -242,6 +242,10 @@ void xt_unregister_match(struct xt_match *target); int xt_register_matches(struct xt_match *match, unsigned int n); void xt_unregister_matches(struct xt_match *match, unsigned int n); +int xt_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, -- cgit v1.2.3 From fc1221b3a163d1386d1052184202d5dc50d302d1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:26 +0200 Subject: netfilter: x_tables: add compat version of xt_check_entry_offsets 32bit rulesets have different layout and alignment requirements, so once more integrity checks get added to xt_check_entry_offsets it will reject well-formed 32bit rulesets. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 0de0862897a4..08de48bbe92e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -494,6 +494,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); +int xt_compat_check_entry_offsets(const void *base, + unsigned int target_offset, + unsigned int next_offset); #endif /* CONFIG_COMPAT */ #endif /* _X_TABLES_H */ -- cgit v1.2.3 From ce683e5f9d045e5d67d1312a42b359cb2ab2a13c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:28 +0200 Subject: netfilter: x_tables: check for bogus target offset We're currently asserting that targetoff + targetsize <= nextoff. Extend it to also check that targetoff is >= sizeof(xt_entry). Since this is generic code, add an argument pointing to the start of the match/target, we can then derive the base structure size from the delta. We also need the e->elems pointer in a followup change to validate matches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 08de48bbe92e..30cfb1e943fb 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -242,7 +242,7 @@ void xt_unregister_match(struct xt_match *target); int xt_register_matches(struct xt_match *match, unsigned int n); void xt_unregister_matches(struct xt_match *match, unsigned int n); -int xt_check_entry_offsets(const void *base, +int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); @@ -494,7 +494,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); -int xt_compat_check_entry_offsets(const void *base, +int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); -- cgit v1.2.3 From 0188346f21e6546498c2a0f84888797ad4063fc5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 14:17:33 +0200 Subject: netfilter: x_tables: xt_compat_match_from_user doesn't need a retval Always returned 0. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 30cfb1e943fb..e2da9b90f1b8 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -484,7 +484,7 @@ void xt_compat_init_offsets(u_int8_t af, unsigned int number); int xt_compat_calc_jump(u_int8_t af, unsigned int offset); int xt_compat_match_offset(const struct xt_match *match); -int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size); int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size); -- cgit v1.2.3 From d7591f0c41ce3e67600a982bab6989ef0f07b3ce Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Apr 2016 15:37:59 +0200 Subject: netfilter: x_tables: introduce and use xt_copy_counters_from_user The three variants use same copy&pasted code, condense this into a helper and use that. Make sure info.name is 0-terminated. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e2da9b90f1b8..4dd9306c9d56 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -251,6 +251,9 @@ int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); +void *xt_copy_counters_from_user(const void __user *user, unsigned int len, + struct xt_counters_info *info, bool compat); + struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, struct xt_table_info *bootstrap, -- cgit v1.2.3 From f9a7cbbf18f1640907d6ca345b8337e4b50ea56f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 8 Apr 2016 17:51:54 +0200 Subject: net: force inlining of netif_tx_start/stop_queue, sock_hold, __sock_put Sometimes gcc mysteriously doesn't inline very small functions we expect to be inlined. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122 Arguably, gcc should do better, but gcc people aren't willing to invest time into it, asking to use __always_inline instead. With this .config: http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os, the following functions get deinlined many times. netif_tx_stop_queue: 207 copies, 590 calls: 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 80 8f e0 01 00 00 01 lock orb $0x1,0x1e0(%rdi) 5d pop %rbp c3 retq netif_tx_start_queue: 47 copies, 111 calls 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 80 a7 e0 01 00 00 fe lock andb $0xfe,0x1e0(%rdi) 5d pop %rbp c3 retq sock_hold: 39 copies, 124 calls 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 ff 87 80 00 00 00 lock incl 0x80(%rdi) 5d pop %rbp c3 retq __sock_put: 6 copies, 13 calls 55 push %rbp 48 89 e5 mov %rsp,%rbp f0 ff 8f 80 00 00 00 lock decl 0x80(%rdi) 5d pop %rbp c3 retq This patch fixes this via s/inline/__always_inline/. Code size decrease after the patch is ~2.5k: text data bss dec hex filename 56719876 56364551 36196352 149280779 8e5d80b vmlinux_before 56717440 56364551 36196352 149278343 8e5ce87 vmlinux Signed-off-by: Denys Vlasenko CC: David S. Miller CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: netfilter-devel@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 166402ae3324..e906c6570b38 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2787,7 +2787,7 @@ static inline void netif_tx_schedule_all(struct net_device *dev) netif_schedule_queue(netdev_get_tx_queue(dev, i)); } -static inline void netif_tx_start_queue(struct netdev_queue *dev_queue) +static __always_inline void netif_tx_start_queue(struct netdev_queue *dev_queue) { clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -2837,7 +2837,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) } } -static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) +static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } -- cgit v1.2.3 From 743b03a83297690f0bd38c452a3bbb47d2be300a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 9 Apr 2016 11:29:58 -0700 Subject: net: remove netdevice gso_min_segs After introduction of ndo_features_check(), we believe that very specific checks for rare features should not be done in core networking stack. No driver uses gso_min_segs yet, so we revert this feature and save few instructions per tx packet in fast path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e906c6570b38..9884fe9a6552 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1586,8 +1586,6 @@ enum netdev_priv_flags { * @gso_max_size: Maximum size of generic segmentation offload * @gso_max_segs: Maximum number of segments that can be passed to the * NIC for GSO - * @gso_min_segs: Minimum number of segments that can be passed to the - * NIC for GSO * * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device @@ -1858,7 +1856,7 @@ struct net_device { unsigned int gso_max_size; #define GSO_MAX_SEGS 65535 u16 gso_max_segs; - u16 gso_min_segs; + #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif -- cgit v1.2.3 From 95114344ea78649b1797d00ab6e88147bef66fa4 Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Sun, 10 Apr 2016 12:42:59 +0300 Subject: qed*: remove version dependency Inbox drivers don't need versioning scheme in order to guarantee compatibility, as both qed and qede are compiled from same codebase. Signed-off-by: Rahul Verma Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 2 +- include/linux/qed/qed_if.h | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index e1d69834a11f..e00c8dbfc324 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -167,7 +167,7 @@ struct qed_eth_ops { struct qed_eth_stats *stats); }; -const struct qed_eth_ops *qed_get_eth_ops(u32 version); +const struct qed_eth_ops *qed_get_eth_ops(void); void qed_put_eth_ops(void); #endif diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 1f7599c77cd4..b007011e1c82 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -271,15 +271,6 @@ struct qed_common_ops { enum qed_led_mode mode); }; -/** - * @brief qed_get_protocol_version - * - * @param protocol - * - * @return version supported by qed for given protocol driver - */ -u32 qed_get_protocol_version(enum qed_protocol protocol); - #define MASK_FIELD(_name, _value) \ ((_value) &= (_name ## _MASK)) -- cgit v1.2.3 From 8c5ebd0c792a097fcc0e526debbe0887ee378ae5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 10 Apr 2016 12:43:00 +0300 Subject: qed: add Rx flow hash/indirection support. Adds the required API for passing RSS-related configuration from qede. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 1 + include/linux/qed/qed_if.h | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index e00c8dbfc324..795c9902e02f 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -27,6 +27,7 @@ struct qed_dev_eth_info { struct qed_update_vport_rss_params { u16 rss_ind_table[128]; u32 rss_key[10]; + u8 rss_caps; }; struct qed_update_vport_params { diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b007011e1c82..67e8c206b2c1 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -515,4 +515,15 @@ static inline void internal_ram_wr(void __iomem *addr, __internal_ram_wr(NULL, addr, size, data); } +enum qed_rss_caps { + QED_RSS_IPV4 = 0x1, + QED_RSS_IPV6 = 0x2, + QED_RSS_IPV4_TCP = 0x4, + QED_RSS_IPV6_TCP = 0x8, + QED_RSS_IPV4_UDP = 0x10, + QED_RSS_IPV6_UDP = 0x20, +}; + +#define QED_RSS_IND_TABLE_SIZE 128 +#define QED_RSS_KEY_SIZE 10 /* size in 32b chunks */ #endif -- cgit v1.2.3 From 04cf31a759ef575f750a63777cee95500e410994 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Mar 2016 22:04:01 +1100 Subject: ftrace: Make ftrace_location_range() global In order to support live patching on powerpc we would like to call ftrace_location_range(), so make it global. Signed-off-by: Torsten Duwe Signed-off-by: Balbir Singh Signed-off-by: Michael Ellerman --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 81de7123959d..3481a8e405f9 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -455,6 +455,7 @@ int ftrace_update_record(struct dyn_ftrace *rec, int enable); int ftrace_test_record(struct dyn_ftrace *rec, int enable); void ftrace_run_stop_machine(int command); unsigned long ftrace_location(unsigned long ip); +unsigned long ftrace_location_range(unsigned long start, unsigned long end); unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec); unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec); -- cgit v1.2.3 From d17322feecf80152303426dd724577025d1fbd7e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 9 Apr 2016 10:52:26 +0200 Subject: gpio: sx150x: move platform data into driver The sx150x has some platform data definition in but this file is only included from the driver in the whole kernel so move its contents into the driver. Cc: Wei Chen Cc: Peter Rosin Acked-by: Wolfram Sang Signed-off-by: Linus Walleij --- include/linux/i2c/sx150x.h | 82 ---------------------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 include/linux/i2c/sx150x.h (limited to 'include/linux') diff --git a/include/linux/i2c/sx150x.h b/include/linux/i2c/sx150x.h deleted file mode 100644 index 52baa79d69a7..000000000000 --- a/include/linux/i2c/sx150x.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Driver for the Semtech SX150x I2C GPIO Expanders - * - * Copyright (c) 2010, Code Aurora Forum. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ -#ifndef __LINUX_I2C_SX150X_H -#define __LINUX_I2C_SX150X_H - -/** - * struct sx150x_platform_data - config data for SX150x driver - * @gpio_base: The index number of the first GPIO assigned to this - * GPIO expander. The expander will create a block of - * consecutively numbered gpios beginning at the given base, - * with the size of the block depending on the model of the - * expander chip. - * @oscio_is_gpo: If set to true, the driver will configure OSCIO as a GPO - * instead of as an oscillator, increasing the size of the - * GP(I)O pool created by this expander by one. The - * output-only GPO pin will be added at the end of the block. - * @io_pullup_ena: A bit-mask which enables or disables the pull-up resistor - * for each IO line in the expander. Setting the bit at - * position n will enable the pull-up for the IO at - * the corresponding offset. For chips with fewer than - * 16 IO pins, high-end bits are ignored. - * @io_pulldn_ena: A bit-mask which enables-or disables the pull-down - * resistor for each IO line in the expander. Setting the - * bit at position n will enable the pull-down for the IO at - * the corresponding offset. For chips with fewer than - * 16 IO pins, high-end bits are ignored. - * @io_open_drain_ena: A bit-mask which enables-or disables open-drain - * operation for each IO line in the expander. Setting the - * bit at position n enables open-drain operation for - * the IO at the corresponding offset. Clearing the bit - * enables regular push-pull operation for that IO. - * For chips with fewer than 16 IO pins, high-end bits - * are ignored. - * @io_polarity: A bit-mask which enables polarity inversion for each IO line - * in the expander. Setting the bit at position n inverts - * the polarity of that IO line, while clearing it results - * in normal polarity. For chips with fewer than 16 IO pins, - * high-end bits are ignored. - * @irq_summary: The 'summary IRQ' line to which the GPIO expander's INT line - * is connected, via which it reports interrupt events - * across all GPIO lines. This must be a real, - * pre-existing IRQ line. - * Setting this value < 0 disables the irq_chip functionality - * of the driver. - * @irq_base: The first 'virtual IRQ' line at which our block of GPIO-based - * IRQ lines will appear. Similarly to gpio_base, the expander - * will create a block of irqs beginning at this number. - * This value is ignored if irq_summary is < 0. - * @reset_during_probe: If set to true, the driver will trigger a full - * reset of the chip at the beginning of the probe - * in order to place it in a known state. - */ -struct sx150x_platform_data { - unsigned gpio_base; - bool oscio_is_gpo; - u16 io_pullup_ena; - u16 io_pulldn_ena; - u16 io_open_drain_ena; - u16 io_polarity; - int irq_summary; - unsigned irq_base; - bool reset_during_probe; -}; - -#endif /* __LINUX_I2C_SX150X_H */ -- cgit v1.2.3 From cbc53e08a793b073e79f42ca33f1f3568703540d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sun, 10 Apr 2016 21:44:51 -0400 Subject: GSO: Add GSO type for fixed IPv4 ID This patch adds support for TSO using IPv4 headers with a fixed IP ID field. This is meant to allow us to do a lossless GRO in the case of TCP flows that use a fixed IP ID such as those that convert IPv6 header to IPv4 headers. In addition I am adding a feature that for now I am referring to TSO with IP ID mangling. Basically when this flag is enabled the device has the option to either output the flow with incrementing IP IDs or with a fixed IP ID regardless of what the original IP ID ordering was. This is useful in cases where the DF bit is set and we do not care if the original IP ID value is maintained. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 +++ include/linux/netdevice.h | 1 + include/linux/skbuff.h | 20 +++++++++++--------- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index a734bf43d190..7cf272a4b5c8 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -39,6 +39,7 @@ enum { NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ + NETIF_F_TSO_MANGLEID_BIT, /* ... IPV4 ID mangling allowed */ NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ @@ -120,6 +121,7 @@ enum { #define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) +#define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) @@ -147,6 +149,7 @@ enum { /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ + NETIF_F_TSO_MANGLEID | \ NETIF_F_TSO6 | NETIF_F_UFO) /* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9884fe9a6552..8e372d01b3c1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3992,6 +3992,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 007381270ff8..5fba16658f9d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -465,23 +465,25 @@ enum { /* This indicates the tcp segment has CWR set. */ SKB_GSO_TCP_ECN = 1 << 3, - SKB_GSO_TCPV6 = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 4, - SKB_GSO_FCOE = 1 << 5, + SKB_GSO_TCPV6 = 1 << 5, - SKB_GSO_GRE = 1 << 6, + SKB_GSO_FCOE = 1 << 6, - SKB_GSO_GRE_CSUM = 1 << 7, + SKB_GSO_GRE = 1 << 7, - SKB_GSO_IPIP = 1 << 8, + SKB_GSO_GRE_CSUM = 1 << 8, - SKB_GSO_SIT = 1 << 9, + SKB_GSO_IPIP = 1 << 9, - SKB_GSO_UDP_TUNNEL = 1 << 10, + SKB_GSO_SIT = 1 << 10, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, + SKB_GSO_UDP_TUNNEL = 1 << 11, - SKB_GSO_TUNNEL_REMCSUM = 1 << 12, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, + + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, }; #if BITS_PER_LONG > 32 -- cgit v1.2.3 From 1530545ed64b42e87acb43c0c16401bd1ebae6bf Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sun, 10 Apr 2016 21:44:57 -0400 Subject: GRO: Add support for TCP with fixed IPv4 ID field, limit tunnel IP ID values This patch does two things. First it allows TCP to aggregate TCP frames with a fixed IPv4 ID field. As a result we should now be able to aggregate flows that were converted from IPv6 to IPv4. In addition this allows us more flexibility for future implementations of segmentation as we may be able to use a fixed IP ID when segmenting the flow. The second thing this does is that it places limitations on the outer IPv4 ID header in the case of tunneled frames. Specifically it forces the IP ID to be incrementing by 1 unless the DF bit is set in the outer IPv4 header. This way we can avoid creating overlapping series of IP IDs that could possibly be fragmented if the frame goes through GRO and is then resegmented via GSO. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8e372d01b3c1..2d70c521d516 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2121,7 +2121,10 @@ struct napi_gro_cb { /* Used in GRE, set in fou/gue_gro_receive */ u8 is_fou:1; - /* 6 bit hole */ + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; + + /* 5 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; -- cgit v1.2.3 From 802ab55adc39a06940a1b384e9fd0387fc762d7e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sun, 10 Apr 2016 21:45:03 -0400 Subject: GSO: Support partial segmentation offload This patch adds support for something I am referring to as GSO partial. The basic idea is that we can support a broader range of devices for segmentation if we use fixed outer headers and have the hardware only really deal with segmenting the inner header. The idea behind the naming is due to the fact that everything before csum_start will be fixed headers, and everything after will be the region that is handled by hardware. With the current implementation it allows us to add support for the following GSO types with an inner TSO_MANGLEID or TSO6 offload: NETIF_F_GSO_GRE NETIF_F_GSO_GRE_CSUM NETIF_F_GSO_IPIP NETIF_F_GSO_SIT NETIF_F_UDP_TUNNEL NETIF_F_UDP_TUNNEL_CSUM In the case of hardware that already supports tunneling we may be able to extend this further to support TSO_TCPV4 without TSO_MANGLEID if the hardware can support updating inner IPv4 headers. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 5 +++++ include/linux/netdevice.h | 2 ++ include/linux/skbuff.h | 9 +++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7cf272a4b5c8..9fc79df0e561 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -48,6 +48,10 @@ enum { NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ + NETIF_F_GSO_PARTIAL_BIT, /* ... Only segment inner-most L4 + * in hardware and all other + * headers in software. + */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, @@ -122,6 +126,7 @@ enum { #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) +#define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d70c521d516..a3bb534576a3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1654,6 +1654,7 @@ struct net_device { netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; + netdev_features_t gso_partial_features; int ifindex; int group; @@ -4004,6 +4005,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5fba16658f9d..da0ace389fec 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -483,7 +483,9 @@ enum { SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 13, + SKB_GSO_PARTIAL = 1 << 13, + + SKB_GSO_TUNNEL_REMCSUM = 1 << 14, }; #if BITS_PER_LONG > 32 @@ -3591,7 +3593,10 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) * Keeps track of level of encapsulation of network headers. */ struct skb_gso_cb { - int mac_offset; + union { + int mac_offset; + int data_offset; + }; int encap_level; __wsum csum; __u16 csum_start; -- cgit v1.2.3 From 435faee1aae9c1ac231f89e4faf0437bfe29f425 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 13 Apr 2016 00:10:51 +0200 Subject: bpf, verifier: add ARG_PTR_TO_RAW_STACK type When passing buffers from eBPF stack space into a helper function, we have ARG_PTR_TO_STACK argument type for helpers available. The verifier makes sure that such buffers are initialized, within boundaries, etc. However, the downside with this is that we have a couple of helper functions such as bpf_skb_load_bytes() that fill out the passed buffer in the expected success case anyway, so zero initializing them prior to the helper call is unneeded/wasted instructions in the eBPF program that can be avoided. Therefore, add a new helper function argument type called ARG_PTR_TO_RAW_STACK. The idea is to skip the STACK_MISC check in check_stack_boundary() and color the related stack slots as STACK_MISC after we checked all call arguments. Helper functions using ARG_PTR_TO_RAW_STACK must make sure that every path of the helper function will fill the provided buffer area, so that we cannot leak any uninitialized stack memory. This f.e. means that error paths need to memset() the buffers, but the expected fast-path doesn't have to do this anymore. Since there's no such helper needing more than at most one ARG_PTR_TO_RAW_STACK argument, we can keep it simple and don't need to check for multiple areas. Should in future such a use-case really appear, we have check_raw_mode() that will make sure we implement support for it first. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b2365a6eba3d..5fb3c610fa96 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -66,6 +66,11 @@ enum bpf_arg_type { * functions that access data on eBPF program stack */ ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ + ARG_PTR_TO_RAW_STACK, /* any pointer to eBPF program stack, area does not + * need to be initialized, helper function must fill + * all bytes or clear them in error case. + */ + ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */ -- cgit v1.2.3 From 58bc67fc32b1c67fb045f4828a67134dc8fee631 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 10 Jul 2015 15:23:28 +0300 Subject: ARM: OMAP2+: gpmc: Add platform data Add a platform data structure for GPMC. It contains all the necessary platform information that needs to be passed from platform init code to GPMC driver. Signed-off-by: Roger Quadros Acked-by: Tony Lindgren --- include/linux/omap-gpmc.h | 3 +-- include/linux/platform_data/gpmc-omap.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 include/linux/platform_data/gpmc-omap.h (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index d833eb4dd446..45d9075be1e5 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -7,8 +7,7 @@ * option) any later version. */ -/* Maximum Number of Chip Selects */ -#define GPMC_CS_NUM 8 +#include #define GPMC_CONFIG_WP 0x00000005 diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h new file mode 100644 index 000000000000..6804a8b387d7 --- /dev/null +++ b/include/linux/platform_data/gpmc-omap.h @@ -0,0 +1,30 @@ +/* + * OMAP GPMC Platform data + * + * Copyright (C) 2014 Texas Instruments, Inc. - http://www.ti.com + * Roger Quadros + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +#ifndef _GPMC_OMAP_H_ +#define _GPMC_OMAP_H_ + +/* Maximum Number of Chip Selects */ +#define GPMC_CS_NUM 8 + +/* Data for each chip select */ +struct gpmc_omap_cs_data { + bool valid; /* data is valid */ + bool is_nand; /* device within this CS is NAND */ + struct platform_device *pdev; /* device within this CS region */ + unsigned int pdata_size; +}; + +struct gpmc_omap_platform_data { + struct gpmc_omap_cs_data cs[GPMC_CS_NUM]; +}; + +#endif /* _GPMC_OMAP_H */ -- cgit v1.2.3 From fabe7d7756d17f5da4bd80fa2373c4bd93ed39e5 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 10 Jul 2015 15:23:29 +0300 Subject: ARM: OMAP2+: gpmc: Add gpmc timings and settings to platform data Add device_timings, gpmc_timings and gpmc_setting to gpmc platform data. Signed-off-by: Roger Quadros Acked-by: Tony Lindgren --- include/linux/omap-gpmc.h | 139 ------------------------------- include/linux/platform_data/gpmc-omap.h | 142 ++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 139 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 45d9075be1e5..2dcef1c8c8d4 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -14,145 +14,6 @@ #define GPMC_IRQ_FIFOEVENTENABLE 0x01 #define GPMC_IRQ_COUNT_EVENT 0x02 -#define GPMC_BURST_4 4 /* 4 word burst */ -#define GPMC_BURST_8 8 /* 8 word burst */ -#define GPMC_BURST_16 16 /* 16 word burst */ -#define GPMC_DEVWIDTH_8BIT 1 /* 8-bit device width */ -#define GPMC_DEVWIDTH_16BIT 2 /* 16-bit device width */ -#define GPMC_MUX_AAD 1 /* Addr-Addr-Data multiplex */ -#define GPMC_MUX_AD 2 /* Addr-Data multiplex */ - -/* bool type time settings */ -struct gpmc_bool_timings { - bool cycle2cyclediffcsen; - bool cycle2cyclesamecsen; - bool we_extra_delay; - bool oe_extra_delay; - bool adv_extra_delay; - bool cs_extra_delay; - bool time_para_granularity; -}; - -/* - * Note that all values in this struct are in nanoseconds except sync_clk - * (which is in picoseconds), while the register values are in gpmc_fck cycles. - */ -struct gpmc_timings { - /* Minimum clock period for synchronous mode (in picoseconds) */ - u32 sync_clk; - - /* Chip-select signal timings corresponding to GPMC_CS_CONFIG2 */ - u32 cs_on; /* Assertion time */ - u32 cs_rd_off; /* Read deassertion time */ - u32 cs_wr_off; /* Write deassertion time */ - - /* ADV signal timings corresponding to GPMC_CONFIG3 */ - u32 adv_on; /* Assertion time */ - u32 adv_rd_off; /* Read deassertion time */ - u32 adv_wr_off; /* Write deassertion time */ - u32 adv_aad_mux_on; /* ADV assertion time for AAD */ - u32 adv_aad_mux_rd_off; /* ADV read deassertion time for AAD */ - u32 adv_aad_mux_wr_off; /* ADV write deassertion time for AAD */ - - /* WE signals timings corresponding to GPMC_CONFIG4 */ - u32 we_on; /* WE assertion time */ - u32 we_off; /* WE deassertion time */ - - /* OE signals timings corresponding to GPMC_CONFIG4 */ - u32 oe_on; /* OE assertion time */ - u32 oe_off; /* OE deassertion time */ - u32 oe_aad_mux_on; /* OE assertion time for AAD */ - u32 oe_aad_mux_off; /* OE deassertion time for AAD */ - - /* Access time and cycle time timings corresponding to GPMC_CONFIG5 */ - u32 page_burst_access; /* Multiple access word delay */ - u32 access; /* Start-cycle to first data valid delay */ - u32 rd_cycle; /* Total read cycle time */ - u32 wr_cycle; /* Total write cycle time */ - - u32 bus_turnaround; - u32 cycle2cycle_delay; - - u32 wait_monitoring; - u32 clk_activation; - - /* The following are only on OMAP3430 */ - u32 wr_access; /* WRACCESSTIME */ - u32 wr_data_mux_bus; /* WRDATAONADMUXBUS */ - - struct gpmc_bool_timings bool_timings; -}; - -/* Device timings in picoseconds */ -struct gpmc_device_timings { - u32 t_ceasu; /* address setup to CS valid */ - u32 t_avdasu; /* address setup to ADV valid */ - /* XXX: try to combine t_avdp_r & t_avdp_w. Issue is - * of tusb using these timings even for sync whilst - * ideally for adv_rd/(wr)_off it should have considered - * t_avdh instead. This indirectly necessitates r/w - * variations of t_avdp as it is possible to have one - * sync & other async - */ - u32 t_avdp_r; /* ADV low time (what about t_cer ?) */ - u32 t_avdp_w; - u32 t_aavdh; /* address hold time */ - u32 t_oeasu; /* address setup to OE valid */ - u32 t_aa; /* access time from ADV assertion */ - u32 t_iaa; /* initial access time */ - u32 t_oe; /* access time from OE assertion */ - u32 t_ce; /* access time from CS asertion */ - u32 t_rd_cycle; /* read cycle time */ - u32 t_cez_r; /* read CS deassertion to high Z */ - u32 t_cez_w; /* write CS deassertion to high Z */ - u32 t_oez; /* OE deassertion to high Z */ - u32 t_weasu; /* address setup to WE valid */ - u32 t_wpl; /* write assertion time */ - u32 t_wph; /* write deassertion time */ - u32 t_wr_cycle; /* write cycle time */ - - u32 clk; - u32 t_bacc; /* burst access valid clock to output delay */ - u32 t_ces; /* CS setup time to clk */ - u32 t_avds; /* ADV setup time to clk */ - u32 t_avdh; /* ADV hold time from clk */ - u32 t_ach; /* address hold time from clk */ - u32 t_rdyo; /* clk to ready valid */ - - u32 t_ce_rdyz; /* XXX: description ?, or use t_cez instead */ - u32 t_ce_avd; /* CS on to ADV on delay */ - - /* XXX: check the possibility of combining - * cyc_aavhd_oe & cyc_aavdh_we - */ - u8 cyc_aavdh_oe;/* read address hold time in cycles */ - u8 cyc_aavdh_we;/* write address hold time in cycles */ - u8 cyc_oe; /* access time from OE assertion in cycles */ - u8 cyc_wpl; /* write deassertion time in cycles */ - u32 cyc_iaa; /* initial access time in cycles */ - - /* extra delays */ - bool ce_xdelay; - bool avd_xdelay; - bool oe_xdelay; - bool we_xdelay; -}; - -struct gpmc_settings { - bool burst_wrap; /* enables wrap bursting */ - bool burst_read; /* enables read page/burst mode */ - bool burst_write; /* enables write page/burst mode */ - bool device_nand; /* device is NAND */ - bool sync_read; /* enables synchronous reads */ - bool sync_write; /* enables synchronous writes */ - bool wait_on_read; /* monitor wait on reads */ - bool wait_on_write; /* monitor wait on writes */ - u32 burst_len; /* page/burst length */ - u32 device_width; /* device bus width (8 or 16 bit) */ - u32 mux_add_data; /* multiplex address & data */ - u32 wait_pin; /* wait-pin to be used */ -}; - extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t, struct gpmc_settings *gpmc_s, struct gpmc_device_timings *dev_t); diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h index 6804a8b387d7..67ccdb0e1606 100644 --- a/include/linux/platform_data/gpmc-omap.h +++ b/include/linux/platform_data/gpmc-omap.h @@ -15,10 +15,152 @@ /* Maximum Number of Chip Selects */ #define GPMC_CS_NUM 8 +/* bool type time settings */ +struct gpmc_bool_timings { + bool cycle2cyclediffcsen; + bool cycle2cyclesamecsen; + bool we_extra_delay; + bool oe_extra_delay; + bool adv_extra_delay; + bool cs_extra_delay; + bool time_para_granularity; +}; + +/* + * Note that all values in this struct are in nanoseconds except sync_clk + * (which is in picoseconds), while the register values are in gpmc_fck cycles. + */ +struct gpmc_timings { + /* Minimum clock period for synchronous mode (in picoseconds) */ + u32 sync_clk; + + /* Chip-select signal timings corresponding to GPMC_CS_CONFIG2 */ + u32 cs_on; /* Assertion time */ + u32 cs_rd_off; /* Read deassertion time */ + u32 cs_wr_off; /* Write deassertion time */ + + /* ADV signal timings corresponding to GPMC_CONFIG3 */ + u32 adv_on; /* Assertion time */ + u32 adv_rd_off; /* Read deassertion time */ + u32 adv_wr_off; /* Write deassertion time */ + u32 adv_aad_mux_on; /* ADV assertion time for AAD */ + u32 adv_aad_mux_rd_off; /* ADV read deassertion time for AAD */ + u32 adv_aad_mux_wr_off; /* ADV write deassertion time for AAD */ + + /* WE signals timings corresponding to GPMC_CONFIG4 */ + u32 we_on; /* WE assertion time */ + u32 we_off; /* WE deassertion time */ + + /* OE signals timings corresponding to GPMC_CONFIG4 */ + u32 oe_on; /* OE assertion time */ + u32 oe_off; /* OE deassertion time */ + u32 oe_aad_mux_on; /* OE assertion time for AAD */ + u32 oe_aad_mux_off; /* OE deassertion time for AAD */ + + /* Access time and cycle time timings corresponding to GPMC_CONFIG5 */ + u32 page_burst_access; /* Multiple access word delay */ + u32 access; /* Start-cycle to first data valid delay */ + u32 rd_cycle; /* Total read cycle time */ + u32 wr_cycle; /* Total write cycle time */ + + u32 bus_turnaround; + u32 cycle2cycle_delay; + + u32 wait_monitoring; + u32 clk_activation; + + /* The following are only on OMAP3430 */ + u32 wr_access; /* WRACCESSTIME */ + u32 wr_data_mux_bus; /* WRDATAONADMUXBUS */ + + struct gpmc_bool_timings bool_timings; +}; + +/* Device timings in picoseconds */ +struct gpmc_device_timings { + u32 t_ceasu; /* address setup to CS valid */ + u32 t_avdasu; /* address setup to ADV valid */ + /* XXX: try to combine t_avdp_r & t_avdp_w. Issue is + * of tusb using these timings even for sync whilst + * ideally for adv_rd/(wr)_off it should have considered + * t_avdh instead. This indirectly necessitates r/w + * variations of t_avdp as it is possible to have one + * sync & other async + */ + u32 t_avdp_r; /* ADV low time (what about t_cer ?) */ + u32 t_avdp_w; + u32 t_aavdh; /* address hold time */ + u32 t_oeasu; /* address setup to OE valid */ + u32 t_aa; /* access time from ADV assertion */ + u32 t_iaa; /* initial access time */ + u32 t_oe; /* access time from OE assertion */ + u32 t_ce; /* access time from CS asertion */ + u32 t_rd_cycle; /* read cycle time */ + u32 t_cez_r; /* read CS deassertion to high Z */ + u32 t_cez_w; /* write CS deassertion to high Z */ + u32 t_oez; /* OE deassertion to high Z */ + u32 t_weasu; /* address setup to WE valid */ + u32 t_wpl; /* write assertion time */ + u32 t_wph; /* write deassertion time */ + u32 t_wr_cycle; /* write cycle time */ + + u32 clk; + u32 t_bacc; /* burst access valid clock to output delay */ + u32 t_ces; /* CS setup time to clk */ + u32 t_avds; /* ADV setup time to clk */ + u32 t_avdh; /* ADV hold time from clk */ + u32 t_ach; /* address hold time from clk */ + u32 t_rdyo; /* clk to ready valid */ + + u32 t_ce_rdyz; /* XXX: description ?, or use t_cez instead */ + u32 t_ce_avd; /* CS on to ADV on delay */ + + /* XXX: check the possibility of combining + * cyc_aavhd_oe & cyc_aavdh_we + */ + u8 cyc_aavdh_oe;/* read address hold time in cycles */ + u8 cyc_aavdh_we;/* write address hold time in cycles */ + u8 cyc_oe; /* access time from OE assertion in cycles */ + u8 cyc_wpl; /* write deassertion time in cycles */ + u32 cyc_iaa; /* initial access time in cycles */ + + /* extra delays */ + bool ce_xdelay; + bool avd_xdelay; + bool oe_xdelay; + bool we_xdelay; +}; + +#define GPMC_BURST_4 4 /* 4 word burst */ +#define GPMC_BURST_8 8 /* 8 word burst */ +#define GPMC_BURST_16 16 /* 16 word burst */ +#define GPMC_DEVWIDTH_8BIT 1 /* 8-bit device width */ +#define GPMC_DEVWIDTH_16BIT 2 /* 16-bit device width */ +#define GPMC_MUX_AAD 1 /* Addr-Addr-Data multiplex */ +#define GPMC_MUX_AD 2 /* Addr-Data multiplex */ + +struct gpmc_settings { + bool burst_wrap; /* enables wrap bursting */ + bool burst_read; /* enables read page/burst mode */ + bool burst_write; /* enables write page/burst mode */ + bool device_nand; /* device is NAND */ + bool sync_read; /* enables synchronous reads */ + bool sync_write; /* enables synchronous writes */ + bool wait_on_read; /* monitor wait on reads */ + bool wait_on_write; /* monitor wait on writes */ + u32 burst_len; /* page/burst length */ + u32 device_width; /* device bus width (8 or 16 bit) */ + u32 mux_add_data; /* multiplex address & data */ + u32 wait_pin; /* wait-pin to be used */ +}; + /* Data for each chip select */ struct gpmc_omap_cs_data { bool valid; /* data is valid */ bool is_nand; /* device within this CS is NAND */ + struct gpmc_settings *settings; + struct gpmc_device_timings *device_timings; + struct gpmc_timings *gpmc_timings; struct platform_device *pdev; /* device within this CS region */ unsigned int pdata_size; }; -- cgit v1.2.3 From f47fcad63f6847ea677c6c7030f30fd6438e0052 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 5 Aug 2015 13:58:01 +0300 Subject: memory: omap-gpmc: Introduce GPMC to NAND interface The OMAP GPMC module has certain registers dedicated for NAND access and some NAND bits mixed with other GPMC functionality. For the NAND dedicated registers we have the struct gpmc_nand_regs. The NAND driver needs to access NAND specific bits from the following non-dedicated registers - EMPTYWRITEBUFFERSTATUS from GPMC_STATUS For accessing these bits we introduce the struct gpmc_nand_ops. Add gpmc_omap_get_nand_ops() that returns the gpmc_nand_ops along with updating the gpmc_nand_regs. This API will be called by the OMAP NAND driver to access the necessary bits in GPMC register space. Signed-off-by: Roger Quadros Acked-by: Tony Lindgren --- include/linux/omap-gpmc.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 2dcef1c8c8d4..dc2ada6fb9b4 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -14,14 +14,45 @@ #define GPMC_IRQ_FIFOEVENTENABLE 0x01 #define GPMC_IRQ_COUNT_EVENT 0x02 +/** + * gpmc_nand_ops - Interface between NAND and GPMC + * @nand_write_buffer_empty: get the NAND write buffer empty status. + */ +struct gpmc_nand_ops { + bool (*nand_writebuffer_empty)(void); +}; + +struct gpmc_nand_regs; + +#if IS_ENABLED(CONFIG_OMAP_GPMC) +struct gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *regs, + int cs); +#else +static inline gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *regs, + int cs) +{ + return NULL; +} +#endif /* CONFIG_OMAP_GPMC */ + +/*--------------------------------*/ + +/* deprecated APIs */ +#if IS_ENABLED(CONFIG_OMAP_GPMC) +void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs); +#else +static inline void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs) +{ +} +#endif /* CONFIG_OMAP_GPMC */ +/*--------------------------------*/ + extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t, struct gpmc_settings *gpmc_s, struct gpmc_device_timings *dev_t); -struct gpmc_nand_regs; struct device_node; -extern void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs); extern int gpmc_get_client_irq(unsigned irq_config); extern unsigned int gpmc_ticks_to_ns(unsigned int ticks); -- cgit v1.2.3 From 384258f252727c67772bbd48dad3185a30ba50d3 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 30 Jul 2015 14:49:23 +0300 Subject: memory: omap-gpmc: Implement IRQ domain for NAND IRQs GPMC provides 2 interrupts for NAND use. i.e. fifoevent and termcount. Use IRQ domain for this. NAND device tree node can then get the necessary interrupts by using gpmc as the interrupt parent. Legacy boot uses gpmc_get_client_irq to get the NAND interrupts from the GPMC IRQ domain. Get rid of custom bitmasks and use IRQ domain for that as well. Signed-off-by: Roger Quadros Acked-by: Rob Herring Acked-by: Tony Lindgren --- include/linux/omap-gpmc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index dc2ada6fb9b4..9e9d79e8efa5 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -11,8 +11,9 @@ #define GPMC_CONFIG_WP 0x00000005 -#define GPMC_IRQ_FIFOEVENTENABLE 0x01 -#define GPMC_IRQ_COUNT_EVENT 0x02 +/* IRQ numbers in GPMC IRQ domain for legacy boot use */ +#define GPMC_IRQ_FIFOEVENTENABLE 0 +#define GPMC_IRQ_COUNT_EVENT 1 /** * gpmc_nand_ops - Interface between NAND and GPMC -- cgit v1.2.3 From c509aefd75d026f4ef4aa306131d7a780c2eda7b Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 5 Aug 2015 14:01:50 +0300 Subject: mtd: nand: omap: Use gpmc_omap_get_nand_ops() to get NAND registers Deprecate nand register passing via platform data and use gpmc_omap_get_nand_ops() instead. Signed-off-by: Roger Quadros Acked-by: Brian Norris Acked-by: Tony Lindgren --- include/linux/platform_data/mtd-nand-omap2.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 090bbab0130a..a067f581e938 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -75,10 +75,12 @@ struct omap_nand_platform_data { enum nand_io xfer_type; int devsize; enum omap_ecc ecc_opt; - struct gpmc_nand_regs reg; /* for passing the partitions */ struct device_node *of_node; struct device_node *elm_of_node; + + /* deprecated */ + struct gpmc_nand_regs reg; }; #endif -- cgit v1.2.3 From c9711ec5250b22fd94e9b34c17c095e001a90e66 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 21 May 2014 07:29:03 +0300 Subject: mtd: nand: omap: Clean up device tree support Move NAND specific device tree parsing to NAND driver. The NAND controller node must have a compatible id, register space resource and interrupt resource. Signed-off-by: Roger Quadros Acked-by: Brian Norris Acked-by: Tony Lindgren --- include/linux/platform_data/mtd-nand-omap2.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index a067f581e938..ff27e5a77e03 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -76,11 +76,10 @@ struct omap_nand_platform_data { int devsize; enum omap_ecc ecc_opt; - /* for passing the partitions */ - struct device_node *of_node; struct device_node *elm_of_node; /* deprecated */ struct gpmc_nand_regs reg; + struct device_node *of_node; }; #endif -- cgit v1.2.3 From 9e6946215dbd9803e8b511928c9f61f3a49e2c58 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 7 Aug 2015 10:38:13 +0300 Subject: memory: omap-gpmc: Prevent GPMC_STATUS from being accessed via gpmc_regs GPMC_STATUS register is private to the GPMC module and must not be accessed directly by NAND driver through the gpmc_regs. They must use gpmc_omap_get_nand_ops() instead. Signed-off-by: Roger Quadros Acked-by: Tony Lindgren --- include/linux/platform_data/mtd-nand-omap2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index ff27e5a77e03..7f6de5377f80 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -45,7 +45,6 @@ enum omap_ecc { }; struct gpmc_nand_regs { - void __iomem *gpmc_status; void __iomem *gpmc_nand_command; void __iomem *gpmc_nand_address; void __iomem *gpmc_nand_data; @@ -64,6 +63,8 @@ struct gpmc_nand_regs { void __iomem *gpmc_bch_result4[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER]; + /* Deprecated. Do not use */ + void __iomem *gpmc_status; }; struct omap_nand_platform_data { -- cgit v1.2.3 From 10f22ee367c4aff7841da6a83c10445d7d6328d9 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 6 Aug 2015 17:39:35 +0300 Subject: mtd: nand: omap2: Implement NAND ready using gpiolib The GPMC WAIT pin status are now available over gpiolib. Update the omap_dev_ready() function to use gpio instead of directly accessing GPMC register space. Signed-off-by: Roger Quadros Acked-by: Brian Norris Acked-by: Boris Brezillon Acked-by: Tony Lindgren --- include/linux/platform_data/mtd-nand-omap2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 7f6de5377f80..17d57a18bac5 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -71,7 +71,6 @@ struct omap_nand_platform_data { int cs; struct mtd_partition *parts; int nr_parts; - bool dev_ready; bool flash_bbt; enum nand_io xfer_type; int devsize; @@ -82,5 +81,6 @@ struct omap_nand_platform_data { /* deprecated */ struct gpmc_nand_regs reg; struct device_node *of_node; + bool dev_ready; }; #endif -- cgit v1.2.3 From 6eae29e7e7144d01a6d6af111d232b36cdd30f51 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 2 Apr 2016 10:54:56 -0500 Subject: crypto: doc - document correct return value for request allocation Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 99c94899ad0f..6e28c895c376 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -948,8 +948,7 @@ static inline struct ablkcipher_request *ablkcipher_request_cast( * encrypt and decrypt API calls. During the allocation, the provided ablkcipher * handle is registered in the request data structure. * - * Return: allocated request handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. + * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct ablkcipher_request *ablkcipher_request_alloc( struct crypto_ablkcipher *tfm, gfp_t gfp) -- cgit v1.2.3 From 675f10bde6cc3874632a8f684df2a8a2a8ace76e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Feb 2016 18:29:18 +0800 Subject: f2fs: fix to convert inline directory correctly With below serials, we will lose parts of dirents: 1) mount f2fs with inline_dentry option 2) echo 1 > /sys/fs/f2fs/sdX/dir_level 3) mkdir dir 4) touch 180 files named [1-180] in dir 5) touch 181 in dir 6) echo 3 > /proc/sys/vm/drop_caches 7) ll dir ls: cannot access 2: No such file or directory ls: cannot access 4: No such file or directory ls: cannot access 5: No such file or directory ls: cannot access 6: No such file or directory ls: cannot access 8: No such file or directory ls: cannot access 9: No such file or directory ... total 360 drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./ drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../ -rw-r--r-- 1 root root 0 Feb 19 15:12 1 -rw-r--r-- 1 root root 0 Feb 19 15:12 10 -rw-r--r-- 1 root root 0 Feb 19 15:12 100 -????????? ? ? ? ? ? 101 -????????? ? ? ? ? ? 102 -????????? ? ? ? ? ? 103 ... The reason is: when doing the inline dir conversion, we didn't consider that directory has hierarchical hash structure which can be configured through sysfs interface 'dir_level'. By default, dir_level of directory inode is 0, it means we have one bucket in hash table located in first level, all dirents will be hashed in this bucket, so it has no problem for us to do the duplication simply between inline dentry page and converted normal dentry page. However, if we configured dir_level with the value N (greater than 0), it will expand the bucket number of first level hash table by 2^N - 1, it hashs dirents into different buckets according their hash value, if we still move all dirents to first bucket, it makes incorrent locating for inline dirents, the result is, although we can iterate all dirents through ->readdir, we can't stat some of them in ->lookup which based on hash table searching. This patch fixes this issue by rehashing dirents into correct position when converting inline directory. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index b90e9bdbd1dd..4c02c6521fef 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -508,4 +508,6 @@ enum { F2FS_FT_MAX }; +#define S_SHIFT 12 + #endif /* _LINUX_F2FS_FS_H */ -- cgit v1.2.3 From 298535c00a2cbcd59e38f8f1c0c9ae7b9911e946 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 8 Apr 2016 15:50:25 -0700 Subject: of, numa: Add NUMA of binding implementation. Add device tree parsing for NUMA topology using device "numa-node-id" property in distance-map and cpu nodes. This is a complete rewrite of a previous patch by: Ganapatrao Kulkarni Signed-off-by: David Daney Acked-by: Rob Herring Signed-off-by: Will Deacon --- include/linux/of.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 7fcb681baadf..76f07c84040f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -685,6 +685,15 @@ static inline int of_node_to_nid(struct device_node *device) } #endif +#ifdef CONFIG_OF_NUMA +extern int of_numa_init(void); +#else +static inline int of_numa_init(void) +{ + return -ENOSYS; +} +#endif + static inline struct device_node *of_find_matching_node( struct device_node *from, const struct of_device_id *matches) -- cgit v1.2.3 From 522566376a3f8373fbd5ff75bb8a7a2da701c1a7 Mon Sep 17 00:00:00 2001 From: Aviya Erenfeld Date: Thu, 14 Apr 2016 11:59:31 +0200 Subject: devcoredump: add scatterlist support Add scatterlist support (dev_coredumpsg) to allow drivers to avoid vmalloc() like dev_coredumpm(), while also avoiding the module reference that the latter function requires. This internally uses dev_coredumpm() with function inside the devcoredump module, requiring removing the const (which touches the driver using it.) Signed-off-by: Aviya Erenfeld Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- include/linux/devcoredump.h | 86 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c0a360e99f64..269521f143ac 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -1,3 +1,22 @@ +/* + * This file is provided under the GPLv2 license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Deutschland GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + */ #ifndef __DEVCOREDUMP_H #define __DEVCOREDUMP_H @@ -5,17 +24,62 @@ #include #include +#include +#include + +/* + * _devcd_free_sgtable - free all the memory of the given scatterlist table + * (i.e. both pages and scatterlist instances) + * NOTE: if two tables allocated and chained using the sg_chain function then + * this function should be called only once on the first table + * @table: pointer to sg_table to free + */ +static inline void _devcd_free_sgtable(struct scatterlist *table) +{ + int i; + struct page *page; + struct scatterlist *iter; + struct scatterlist *delete_iter; + + /* free pages */ + iter = table; + for_each_sg(table, iter, sg_nents(table), i) { + page = sg_page(iter); + if (page) + __free_page(page); + } + + /* then free all chained tables */ + iter = table; + delete_iter = table; /* always points on a head of a table */ + while (!sg_is_last(iter)) { + iter++; + if (sg_is_chain(iter)) { + iter = sg_chain_ptr(iter); + kfree(delete_iter); + delete_iter = iter; + } + } + + /* free the last table */ + kfree(delete_iter); +} + + #ifdef CONFIG_DEV_COREDUMP -void dev_coredumpv(struct device *dev, const void *data, size_t datalen, +void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp); void dev_coredumpm(struct device *dev, struct module *owner, - const void *data, size_t datalen, gfp_t gfp, + void *data, size_t datalen, gfp_t gfp, ssize_t (*read)(char *buffer, loff_t offset, size_t count, - const void *data, size_t datalen), - void (*free)(const void *data)); + void *data, size_t datalen), + void (*free)(void *data)); + +void dev_coredumpsg(struct device *dev, struct scatterlist *table, + size_t datalen, gfp_t gfp); #else -static inline void dev_coredumpv(struct device *dev, const void *data, +static inline void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp) { vfree(data); @@ -23,13 +87,19 @@ static inline void dev_coredumpv(struct device *dev, const void *data, static inline void dev_coredumpm(struct device *dev, struct module *owner, - const void *data, size_t datalen, gfp_t gfp, + void *data, size_t datalen, gfp_t gfp, ssize_t (*read)(char *buffer, loff_t offset, size_t count, - const void *data, size_t datalen), - void (*free)(const void *data)) + void *data, size_t datalen), + void (*free)(void *data)) { free(data); } + +static inline void dev_coredumpsg(struct device *dev, struct scatterlist *table, + size_t datalen, gfp_t gfp) +{ + _devcd_free_sgtable(table); +} #endif /* CONFIG_DEV_COREDUMP */ #endif /* __DEVCOREDUMP_H */ -- cgit v1.2.3 From 9b1d6c8950021ab007608d455fc9c398ecd25476 Mon Sep 17 00:00:00 2001 From: Ming Lin Date: Mon, 4 Apr 2016 14:48:11 -0700 Subject: lib: scatterlist: move SG pool code from SCSI driver to lib/sg_pool.c Now it's ready to move the mempool based SG chained allocator code from SCSI driver to lib/sg_pool.c, which will be compiled only based on a Kconfig symbol CONFIG_SG_POOL. SCSI selects CONFIG_SG_POOL. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lin Reviewed-by: Sagi Grimberg Signed-off-by: Martin K. Petersen --- include/linux/scatterlist.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 556ec1ea2574..cb3c8fe6acd7 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -285,6 +285,31 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, */ #define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist)) +/* + * The maximum number of SG segments that we will put inside a + * scatterlist (unless chaining is used). Should ideally fit inside a + * single page, to avoid a higher order allocation. We could define this + * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The + * minimum value is 32 + */ +#define SG_CHUNK_SIZE 128 + +/* + * Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit + * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. + */ +#ifdef CONFIG_ARCH_HAS_SG_CHAIN +#define SG_MAX_SEGMENTS 2048 +#else +#define SG_MAX_SEGMENTS SG_CHUNK_SIZE +#endif + +#ifdef CONFIG_SG_POOL +void sg_free_table_chained(struct sg_table *table, bool first_chunk); +int sg_alloc_table_chained(struct sg_table *table, int nents, + struct scatterlist *first_chunk); +#endif + /* * sg page iterator * -- cgit v1.2.3 From 464f664501816ef5fbbc00b8de96f4ae5a1c9325 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 14 Apr 2016 01:38:29 -0400 Subject: qed: Add infrastructure support for tunneling This patch adds various structure/APIs needed to configure/enable different tunnel [VXLAN/GRE/GENEVE] parameters on the adapter. Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 795c9902e02f..3a4c806be156 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -112,6 +112,13 @@ struct qed_queue_start_common_params { u16 sb_idx; }; +struct qed_tunn_params { + u16 vxlan_port; + u8 update_vxlan_port; + u16 geneve_port; + u8 update_geneve_port; +}; + struct qed_eth_cb_ops { struct qed_common_cb_ops common; }; @@ -166,6 +173,9 @@ struct qed_eth_ops { void (*get_vport_stats)(struct qed_dev *cdev, struct qed_eth_stats *stats); + + int (*tunn_config)(struct qed_dev *cdev, + struct qed_tunn_params *params); }; const struct qed_eth_ops *qed_get_eth_ops(void); -- cgit v1.2.3 From e1c9c62b9a3a761b56359a7437215ae2e9253821 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 11 Apr 2016 23:10:21 +0300 Subject: net/mlx5: Fix mlx5 ifc cmd_hca_cap bad offsets All reserved fields after early_vf_enable are off by 1, since early_vf_enable was not explicitly declared as array of size 1. Reserved field before cqe_zip had a wrong size, it should be 0x80 + 0x3f. Fixes: b0844444590e ("net/mlx5_core: Introduce access function to read internal timer ") Fixes: b4ff3a36d3e4 ("net/mlx5: Use offset based reserved field names in the IFC header file") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 107 ++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c15b8a864937..c300e7491d80 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -750,21 +750,21 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ets[0x1]; u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; - u8 early_vf_enable; - u8 reserved_at_1a8[0x2]; + u8 early_vf_enable[0x1]; + u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; u8 reserved_at_1af[0x6]; u8 port_type[0x2]; u8 num_ports[0x8]; - u8 reserved_at_1bf[0x3]; + u8 reserved_at_1c0[0x3]; u8 log_max_msg[0x5]; - u8 reserved_at_1c7[0x4]; + u8 reserved_at_1c8[0x4]; u8 max_tc[0x4]; - u8 reserved_at_1cf[0x6]; + u8 reserved_at_1d0[0x6]; u8 rol_s[0x1]; u8 rol_g[0x1]; - u8 reserved_at_1d7[0x1]; + u8 reserved_at_1d8[0x1]; u8 wol_s[0x1]; u8 wol_g[0x1]; u8 wol_a[0x1]; @@ -774,47 +774,47 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 wol_p[0x1]; u8 stat_rate_support[0x10]; - u8 reserved_at_1ef[0xc]; + u8 reserved_at_1f0[0xc]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; u8 reserved_at_200[0x3]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_204[0xa]; + u8 reserved_at_205[0xa]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; - u8 reserved_at_212[0x1]; + u8 reserved_at_213[0x1]; u8 wq_signature[0x1]; u8 sctr_data_cqe[0x1]; - u8 reserved_at_215[0x1]; + u8 reserved_at_216[0x1]; u8 sho[0x1]; u8 tph[0x1]; u8 rf[0x1]; u8 dct[0x1]; - u8 reserved_at_21a[0x1]; + u8 reserved_at_21b[0x1]; u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; - u8 reserved_at_21e[0x1]; + u8 reserved_at_21f[0x1]; u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 reserved_at_222[0x3]; + u8 reserved_at_223[0x3]; u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; - u8 reserved_at_228[0x1]; + u8 reserved_at_229[0x1]; u8 scqe_break_moderation[0x1]; u8 reserved_at_22a[0x1]; u8 cd[0x1]; - u8 reserved_at_22c[0x1]; + u8 reserved_at_22d[0x1]; u8 apm[0x1]; u8 vector_calc[0x1]; u8 reserved_at_22f[0x1]; u8 imaicl[0x1]; - u8 reserved_at_231[0x4]; + u8 reserved_at_232[0x4]; u8 qkv[0x1]; u8 pkv[0x1]; u8 set_deth_sqpn[0x1]; @@ -824,98 +824,101 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_at_23f[0xa]; + u8 reserved_at_240[0xa]; u8 uar_sz[0x6]; - u8 reserved_at_24f[0x8]; + u8 reserved_at_250[0x8]; u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_at_260[0x1]; + u8 reserved_at_261[0x1]; u8 pad_tx_eth_packet[0x1]; - u8 reserved_at_262[0x8]; + u8 reserved_at_263[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_at_26f[0x10]; + u8 reserved_at_270[0x10]; - u8 reserved_at_27f[0x10]; + u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_29f[0x10]; + u8 reserved_at_2a0[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_at_2bf[0x10]; + u8 reserved_at_2c0[0x10]; u8 max_wqe_sz_sq_dc[0x10]; - u8 reserved_at_2df[0x7]; + u8 reserved_at_2e0[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_at_2ff[0x18]; + u8 reserved_at_300[0x18]; u8 log_max_mcg[0x8]; - u8 reserved_at_31f[0x3]; + u8 reserved_at_320[0x3]; u8 log_max_transport_domain[0x5]; - u8 reserved_at_327[0x3]; + u8 reserved_at_328[0x3]; u8 log_max_pd[0x5]; - u8 reserved_at_32f[0xb]; + u8 reserved_at_330[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_at_33f[0x20]; + u8 reserved_at_340[0x20]; - u8 reserved_at_35f[0x3]; + u8 reserved_at_360[0x3]; u8 log_max_rq[0x5]; - u8 reserved_at_367[0x3]; + u8 reserved_at_368[0x3]; u8 log_max_sq[0x5]; - u8 reserved_at_36f[0x3]; + u8 reserved_at_370[0x3]; u8 log_max_tir[0x5]; - u8 reserved_at_377[0x3]; + u8 reserved_at_378[0x3]; u8 log_max_tis[0x5]; u8 basic_cyclic_rcv_wqe[0x1]; - u8 reserved_at_380[0x2]; + u8 reserved_at_381[0x2]; u8 log_max_rmp[0x5]; - u8 reserved_at_387[0x3]; + u8 reserved_at_388[0x3]; u8 log_max_rqt[0x5]; - u8 reserved_at_38f[0x3]; + u8 reserved_at_390[0x3]; u8 log_max_rqt_size[0x5]; - u8 reserved_at_397[0x3]; + u8 reserved_at_398[0x3]; u8 log_max_tis_per_sq[0x5]; - u8 reserved_at_39f[0x3]; + u8 reserved_at_3a0[0x3]; u8 log_max_stride_sz_rq[0x5]; - u8 reserved_at_3a7[0x3]; + u8 reserved_at_3a8[0x3]; u8 log_min_stride_sz_rq[0x5]; - u8 reserved_at_3af[0x3]; + u8 reserved_at_3b0[0x3]; u8 log_max_stride_sz_sq[0x5]; - u8 reserved_at_3b7[0x3]; + u8 reserved_at_3b8[0x3]; u8 log_min_stride_sz_sq[0x5]; - u8 reserved_at_3bf[0x1b]; + u8 reserved_at_3c0[0x1b]; u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; - u8 reserved_at_3e0[0xa]; + u8 reserved_at_3e1[0xa]; u8 log_max_vlan_list[0x5]; - u8 reserved_at_3ef[0x3]; + u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; - u8 reserved_at_3f7[0x3]; + u8 reserved_at_3f8[0x3]; u8 log_max_current_uc_list[0x5]; - u8 reserved_at_3ff[0x80]; + u8 reserved_at_400[0x80]; - u8 reserved_at_47f[0x3]; + u8 reserved_at_480[0x3]; u8 log_max_l2_table[0x5]; - u8 reserved_at_487[0x8]; + u8 reserved_at_488[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_at_49f[0x20]; + u8 reserved_at_4a0[0x20]; u8 device_frequency_mhz[0x20]; u8 device_frequency_khz[0x20]; - u8 reserved_at_4ff[0x5f]; + + u8 reserved_at_500[0x80]; + + u8 reserved_at_580[0x3f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_at_57f[0x220]; + u8 reserved_at_5e0[0x220]; }; enum mlx5_flow_destination_type { -- cgit v1.2.3 From 7d5e14237a551a5de3d287f2e8db2d044ee81a1a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 11 Apr 2016 23:10:22 +0300 Subject: net/mlx5: Update mlx5_ifc hardware features Adding the needed mlx5_ifc hardware bits and structs for the following feature: * Add vport to steering commands for SRIOV ACL support * Add mlcr, pcmr and mcia registers for dump module EEPROM * Add support for FCS, baeacon led and disable_link bits to hca caps * Add CQE period mode bit in CQ context for CQE based CQ moderation support * Add umr SQ bit for fragmented memory registration * Add needed bits and caps for Striding RQ support Signed-off-by: Saeed Mahameed Signed-off-by: Matan Barak Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 146 +++++++++++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c300e7491d80..4ce4ea422a10 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -513,7 +513,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 max_lso_cap[0x5]; u8 reserved_at_10[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_at_18[0x3]; + u8 reg_umr_sq[0x1]; + u8 scatter_fcs[0x1]; + u8 reserved_at_1a[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; @@ -648,7 +650,7 @@ struct mlx5_ifc_vector_calc_cap_bits { enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, - MLX5_WQ_TYPE_STRQ = 0x2, + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ = 0x2, }; enum { @@ -753,7 +755,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 early_vf_enable[0x1]; u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; - u8 reserved_at_1af[0x6]; + u8 reserved_at_1af[0x2]; + u8 ports_check[0x1]; + u8 reserved_at_1b2[0x1]; + u8 disable_link_up[0x1]; + u8 beacon_led[0x1]; u8 port_type[0x2]; u8 num_ports[0x8]; @@ -778,7 +784,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; - u8 reserved_at_200[0x3]; + u8 striding_rq[0x1]; + u8 reserved_at_201[0x2]; u8 ipoib_basic_offloads[0x1]; u8 reserved_at_205[0xa]; u8 drain_sigerr[0x1]; @@ -807,12 +814,12 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 block_lb_mc[0x1]; u8 reserved_at_229[0x1]; u8 scqe_break_moderation[0x1]; - u8 reserved_at_22a[0x1]; + u8 cq_period_start_from_cqe[0x1]; u8 cd[0x1]; u8 reserved_at_22d[0x1]; u8 apm[0x1]; u8 vector_calc[0x1]; - u8 reserved_at_22f[0x1]; + u8 umr_ptr_rlky[0x1]; u8 imaicl[0x1]; u8 reserved_at_232[0x4]; u8 qkv[0x1]; @@ -913,10 +920,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_500[0x80]; u8 reserved_at_580[0x3f]; - u8 cqe_zip[0x1]; + u8 cqe_compression[0x1]; - u8 cqe_zip_timeout[0x10]; - u8 cqe_zip_max_num[0x10]; + u8 cqe_compression_timeout[0x10]; + u8 cqe_compression_max_num[0x10]; u8 reserved_at_5e0[0x220]; }; @@ -1000,7 +1007,13 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0x4e0]; + u8 reserved_at_120[0x15]; + u8 log_wqe_num_of_strides[0x3]; + u8 two_byte_shift_en[0x1]; + u8 reserved_at_139[0x4]; + u8 log_wqe_stride_size[0x3]; + + u8 reserved_at_140[0x4c0]; struct mlx5_ifc_cmd_pas_bits pas[0]; }; @@ -2199,7 +2212,8 @@ struct mlx5_ifc_sqc_bits { u8 flush_in_error_en[0x1]; u8 reserved_at_4[0x4]; u8 state[0x4]; - u8 reserved_at_c[0x14]; + u8 reg_umr[0x1]; + u8 reserved_at_d[0x13]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; @@ -2247,7 +2261,8 @@ enum { struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; - u8 reserved_at_1[0x2]; + u8 reserved_at_1[0x1]; + u8 scatter_fcs[0x1]; u8 vsd[0x1]; u8 mem_rq_type[0x4]; u8 state[0x4]; @@ -2604,6 +2619,11 @@ enum { MLX5_CQC_ST_FIRED = 0xa, }; +enum { + MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, + MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, +}; + struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_at_4[0x4]; @@ -2612,8 +2632,8 @@ struct mlx5_ifc_cqc_bits { u8 reserved_at_c[0x1]; u8 scqe_break_moderation_en[0x1]; u8 oi[0x1]; - u8 reserved_at_f[0x2]; - u8 cqe_zip_en[0x1]; + u8 cq_period_mode[0x2]; + u8 cqe_comp_en[0x1]; u8 mini_cqe_res_format[0x2]; u8 st[0x4]; u8 reserved_at_18[0x8]; @@ -2987,7 +3007,11 @@ struct mlx5_ifc_set_fte_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5181,7 +5205,11 @@ struct mlx5_ifc_destroy_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5208,7 +5236,11 @@ struct mlx5_ifc_destroy_flow_group_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5349,7 +5381,11 @@ struct mlx5_ifc_delete_fte_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5795,7 +5831,11 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5839,7 +5879,11 @@ struct mlx5_ifc_create_flow_group_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -6372,6 +6416,17 @@ struct mlx5_ifc_ptys_reg_bits { u8 reserved_at_1a0[0x60]; }; +struct mlx5_ifc_mlcr_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x20]; + + u8 beacon_duration[0x10]; + u8 reserved_at_40[0x10]; + + u8 beacon_remain[0x10]; +}; + struct mlx5_ifc_ptas_reg_bits { u8 reserved_at_0[0x20]; @@ -6781,6 +6836,16 @@ struct mlx5_ifc_pamp_reg_bits { u8 index_data[18][0x10]; }; +struct mlx5_ifc_pcmr_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x2e]; + u8 fcs_cap[0x1]; + u8 reserved_at_3f[0x1f]; + u8 fcs_chk[0x1]; + u8 reserved_at_5f[0x1]; +}; + struct mlx5_ifc_lane_2_module_mapping_bits { u8 reserved_at_0[0x6]; u8 rx_lane[0x2]; @@ -7117,6 +7182,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pspa_reg_bits pspa_reg; struct mlx5_ifc_ptas_reg_bits ptas_reg; struct mlx5_ifc_ptys_reg_bits ptys_reg; + struct mlx5_ifc_mlcr_reg_bits mlcr_reg; struct mlx5_ifc_pude_reg_bits pude_reg; struct mlx5_ifc_pvlc_reg_bits pvlc_reg; struct mlx5_ifc_slrg_reg_bits slrg_reg; @@ -7150,7 +7216,11 @@ struct mlx5_ifc_set_flow_table_root_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -7181,7 +7251,9 @@ struct mlx5_ifc_modify_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x20]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; u8 reserved_at_60[0x10]; u8 modify_field_select[0x10]; @@ -7247,4 +7319,34 @@ struct mlx5_ifc_qtct_reg_bits { u8 tclass[0x3]; }; +struct mlx5_ifc_mcia_reg_bits { + u8 l[0x1]; + u8 reserved_at_1[0x7]; + u8 module[0x8]; + u8 reserved_at_10[0x8]; + u8 status[0x8]; + + u8 i2c_device_address[0x8]; + u8 page_number[0x8]; + u8 device_address[0x10]; + + u8 reserved_at_40[0x10]; + u8 size[0x10]; + + u8 reserved_at_60[0x20]; + + u8 dword_0[0x20]; + u8 dword_1[0x20]; + u8 dword_2[0x20]; + u8 dword_3[0x20]; + u8 dword_4[0x20]; + u8 dword_5[0x20]; + u8 dword_6[0x20]; + u8 dword_7[0x20]; + u8 dword_8[0x20]; + u8 dword_9[0x20]; + u8 dword_10[0x20]; + u8 dword_11[0x20]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From 52c52a61a39fb319c14a582f8631619e5d5f55bf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 14 Apr 2016 15:35:30 +0800 Subject: sctp: add sctp_info dump api for sctp_diag sctp_diag will dump some important details of sctp's assoc or ep, we use sctp_info to describe them, sctp_get_sctp_info to get them, and export it to sctp_diag.ko. v2->v3: - we will not use list_for_each_safe in sctp_get_sctp_info, cause all the callers of it will use lock_sock. - fix the holes in struct sctp_info with __reserved* field. because sctp_diag is a new feature, and sctp_info is just for now, it may be changed in the future. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index a9414fd49dc6..dacb5e711994 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -705,4 +705,71 @@ typedef struct sctp_auth_chunk { sctp_authhdr_t auth_hdr; } __packed sctp_auth_chunk_t; +struct sctp_info { + __u32 sctpi_tag; + __u32 sctpi_state; + __u32 sctpi_rwnd; + __u16 sctpi_unackdata; + __u16 sctpi_penddata; + __u16 sctpi_instrms; + __u16 sctpi_outstrms; + __u32 sctpi_fragmentation_point; + __u32 sctpi_inqueue; + __u32 sctpi_outqueue; + __u32 sctpi_overall_error; + __u32 sctpi_max_burst; + __u32 sctpi_maxseg; + __u32 sctpi_peer_rwnd; + __u32 sctpi_peer_tag; + __u8 sctpi_peer_capable; + __u8 sctpi_peer_sack; + __u16 __reserved1; + + /* assoc status info */ + __u64 sctpi_isacks; + __u64 sctpi_osacks; + __u64 sctpi_opackets; + __u64 sctpi_ipackets; + __u64 sctpi_rtxchunks; + __u64 sctpi_outofseqtsns; + __u64 sctpi_idupchunks; + __u64 sctpi_gapcnt; + __u64 sctpi_ouodchunks; + __u64 sctpi_iuodchunks; + __u64 sctpi_oodchunks; + __u64 sctpi_iodchunks; + __u64 sctpi_octrlchunks; + __u64 sctpi_ictrlchunks; + + /* primary transport info */ + struct sockaddr_storage sctpi_p_address; + __s32 sctpi_p_state; + __u32 sctpi_p_cwnd; + __u32 sctpi_p_srtt; + __u32 sctpi_p_rto; + __u32 sctpi_p_hbinterval; + __u32 sctpi_p_pathmaxrxt; + __u32 sctpi_p_sackdelay; + __u32 sctpi_p_sackfreq; + __u32 sctpi_p_ssthresh; + __u32 sctpi_p_partial_bytes_acked; + __u32 sctpi_p_flight_size; + __u16 sctpi_p_error; + __u16 __reserved2; + + /* sctp sock info */ + __u32 sctpi_s_autoclose; + __u32 sctpi_s_adaptation_ind; + __u32 sctpi_s_pd_point; + __u8 sctpi_s_nodelay; + __u8 sctpi_s_disable_fragments; + __u8 sctpi_s_v4mapped; + __u8 sctpi_s_frag_interleave; +}; + +struct sctp_infox { + struct sctp_info *sctpinfo; + struct sctp_association *asoc; +}; + #endif /* __LINUX_SCTP_H__ */ -- cgit v1.2.3 From c5cc2a0bc930f1ae00b198aeb752acc3bdd4d5a7 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 16 Mar 2016 21:54:55 +0200 Subject: clk: ti: dpll: add support for specifying max rate for DPLLs DPLLs typically have a maximum rate they can support, and this varies from DPLL to DPLL. Add support of the maximum rate value to the DPLL data struct, and also add check for this in the DPLL round_rate function. Signed-off-by: Tero Kristo Reviewed-by: Nishanth Menon Cc: Tomi Valkeinen Cc: Lokesh Vutla Signed-off-by: Stephen Boyd --- include/linux/clk/ti.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index dc5164a6df29..6110fe09ed18 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -37,6 +37,7 @@ * @last_rounded_n: cache of the last N result of omap2_dpll_round_rate() * @min_divider: minimum valid non-bypass divider value (actual) * @max_divider: maximum valid non-bypass divider value (actual) + * @max_rate: maximum clock rate for the DPLL * @modes: possible values of @enable_mask * @autoidle_reg: register containing the DPLL autoidle mode bitfield * @idlest_reg: register containing the DPLL idle status bitfield @@ -81,6 +82,7 @@ struct dpll_data { u8 last_rounded_n; u8 min_divider; u16 max_divider; + unsigned long max_rate; u8 modes; void __iomem *autoidle_reg; void __iomem *idlest_reg; -- cgit v1.2.3 From af8a41271b56f6d79cb4d7c7f3ca688a2d97a801 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 15 Apr 2016 16:59:38 +0200 Subject: iio:adis: Add support for manual self-test flag clear Some variants of the devices from the ADIS family don't auto-clear the self-test bit after the self-test has completed. Instead we have to manually clear. Add support for this to the ADIS library. Signed-off-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index fa2d01ef8f55..360da7d18a3d 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -41,6 +41,7 @@ struct adis_data { unsigned int diag_stat_reg; unsigned int self_test_mask; + bool self_test_no_autoclear; unsigned int startup_delay; const char * const *status_error_msgs; -- cgit v1.2.3 From 756ca874417695f77941948a77e9b8562635cc0a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 14 Apr 2016 17:04:34 -0400 Subject: netdev_features: Add NETIF_F_TSO_MANGLEID to NETIF_F_ALL_TSO I realized that when I added NETIF_F_TSO_MANGLEID as a TSO type I forgot to add it to NETIF_F_ALL_TSO. This patch corrects that so the flag will be included correctly. The result should be minor as it was only used by a few drivers and in a few specific cases such as when NETIF_F_SG was not supported on a device so the TSO flags were cleared. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 9fc79df0e561..15eb0b12fff9 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -164,7 +164,8 @@ enum { #define NETIF_F_CSUM_MASK (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ NETIF_F_HW_CSUM) -#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) +#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | \ + NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) #define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ NETIF_F_FSO) -- cgit v1.2.3 From 94be46b9e5e2954b6d5962750f8aae8b5f099baa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 18 Apr 2016 15:33:10 +0200 Subject: regulator: s2mps11: Set default ramp delay for S2MPS11 LDOs Driver did not provide default value for ramp delay for LDOs which lead to warning in dmesg, e.g. on Odroid XU4: [ 1.486076] vdd_ldo9: ramp_delay not set [ 1.506875] vddq_mmc2: ramp_delay not set [ 1.523766] vdd_ldo15: ramp_delay not set [ 1.544702] vdd_sd: ramp_delay not set The datasheet for all the S2MPS1x family is inconsistent here and does not specify unambiguously the value of ramp delay for LDO. It mentions 30 mV/us in one timing diagram but then omits it completely in LDO regulator characteristics table (it is specified for bucks). However the vendor kernels for Galaxy S5 and Odroid XU3 use values of 12 mV/us or 24 mV/us. Without the ramp delay value the consumers do not wait for voltage settle after changing it. Although the proper value of ramp delay for LDOs is unknown, it seems safer to use at least some value from reference kernel than to leave it unset. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- include/linux/mfd/samsung/core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index 6bc4bcd488ac..5a23dd4df432 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -30,6 +30,9 @@ #define MIN_600_MV 600000 #define MIN_500_MV 500000 +/* Ramp delay in uV/us */ +#define RAMP_DELAY_12_MVUS 12000 + /* Macros to represent steps for LDO/BUCK */ #define STEP_50_MV 50000 #define STEP_25_MV 25000 -- cgit v1.2.3 From 6d62b4d5fac620ee0ca65dc6d99b0306d96bc541 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Fri, 15 Apr 2016 00:34:59 +0200 Subject: net: ethtool: export conversion function between u32 and link mode The function convert_legacy_u32_to_link_mode and convert_link_mode_to_legacy_u32 may be used outside of ethtool.c. We rename them to ethtool_convert_... and export them, so we could use them in others drivers and modules. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- include/linux/ethtool.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e2b7bf27c03e..9ded8c6d8176 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -150,6 +150,13 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, + u32 legacy_u32); + +/* return false if src had higher bits set. lower bits always updated. */ +bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, + const unsigned long *src); + /** * struct ethtool_ops - optional netdev operations * @get_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings -- cgit v1.2.3 From 2d55173e71b06c5a369489852d972304e14189fd Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Fri, 15 Apr 2016 00:35:00 +0200 Subject: phy: add generic function to support ksetting support The old ethtool api (get_setting and set_setting) has generic phy functions phy_ethtool_sset and phy_ethtool_gset. To supprt the new ethtool api (get_link_ksettings and set_link_ksettings), we add generic phy function phy_ethtool_ksettings_get and phy_ethtool_ksettings_set. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 2abd7918f64f..be3f83bbdc0b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -805,6 +805,10 @@ void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_ethtool_ksettings_get(struct phy_device *phydev, + struct ethtool_link_ksettings *cmd); +int phy_ethtool_ksettings_set(struct phy_device *phydev, + const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -- cgit v1.2.3 From 5ae74f2cc2f150c1a5cee54cabbd71dd0661285a Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Mon, 11 Apr 2016 10:13:18 +0800 Subject: ACPI / tables: Move table override mechanisms to tables.c This patch moves acpi_os_table_override() and acpi_os_physical_table_override() to tables.c. Along with the mechanisms, acpi_initrd_initialize_tables() is also moved to tables.c to form a static function. The following functions are renamed according to this change: 1. acpi_initrd_override() -> renamed to early_acpi_table_init(), which invokes acpi_table_initrd_init() 2. acpi_os_physical_table_override() -> which invokes acpi_table_initrd_override() 3. acpi_initialize_initrd_tables() -> renamed to acpi_table_initrd_scan() Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 06ed7e54033e..7718c04aa09f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -190,14 +190,6 @@ static inline int acpi_debugger_notify_command_complete(void) } #endif -#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE -void acpi_initrd_override(void *data, size_t size); -#else -static inline void acpi_initrd_override(void *data, size_t size) -{ -} -#endif - #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) @@ -216,6 +208,7 @@ void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); +void early_acpi_table_init(void *data, size_t size); int acpi_table_init (void); int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init acpi_parse_entries(char *id, unsigned long table_size, @@ -596,6 +589,7 @@ static inline const char *acpi_dev_name(struct acpi_device *adev) return NULL; } +static inline void early_acpi_table_init(void *data, size_t size) { } static inline void acpi_early_init(void) { } static inline void acpi_subsystem_init(void) { } -- cgit v1.2.3 From 20147f0d4f50f6f0d1fbe1815fe3d4d0a6444a70 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 29 Mar 2016 17:22:26 +0800 Subject: mfd: axp20x: Add support for AXP809 PMIC The X-Powers AXP809 is a new PMIC that is paired with Allwinner's A80 SoC, along with a slave AXP806 PMIC. This PMIC is quite similar to the earlier AXP223, though the interrupts and regulator have changed a bit. This patch adds support for the interrupts and power button of the PMIC. Signed-off-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index d82e7d51372b..0be4982f08fe 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -20,6 +20,7 @@ enum { AXP221_ID, AXP223_ID, AXP288_ID, + AXP809_ID, NR_AXP20X_VARIANTS, }; @@ -264,6 +265,29 @@ enum { AXP22X_REG_ID_MAX, }; +enum { + AXP809_DCDC1 = 0, + AXP809_DCDC2, + AXP809_DCDC3, + AXP809_DCDC4, + AXP809_DCDC5, + AXP809_DC1SW, + AXP809_DC5LDO, + AXP809_ALDO1, + AXP809_ALDO2, + AXP809_ALDO3, + AXP809_ELDO1, + AXP809_ELDO2, + AXP809_ELDO3, + AXP809_DLDO1, + AXP809_DLDO2, + AXP809_RTC_LDO, + AXP809_LDO_IO0, + AXP809_LDO_IO1, + AXP809_SW, + AXP809_REG_ID_MAX, +}; + /* IRQs */ enum { AXP152_IRQ_LDO0IN_CONNECT = 1, @@ -390,6 +414,41 @@ enum axp288_irqs { AXP288_IRQ_BC_USB_CHNG, }; +enum axp809_irqs { + AXP809_IRQ_ACIN_OVER_V = 1, + AXP809_IRQ_ACIN_PLUGIN, + AXP809_IRQ_ACIN_REMOVAL, + AXP809_IRQ_VBUS_OVER_V, + AXP809_IRQ_VBUS_PLUGIN, + AXP809_IRQ_VBUS_REMOVAL, + AXP809_IRQ_VBUS_V_LOW, + AXP809_IRQ_BATT_PLUGIN, + AXP809_IRQ_BATT_REMOVAL, + AXP809_IRQ_BATT_ENT_ACT_MODE, + AXP809_IRQ_BATT_EXIT_ACT_MODE, + AXP809_IRQ_CHARG, + AXP809_IRQ_CHARG_DONE, + AXP809_IRQ_BATT_CHG_TEMP_HIGH, + AXP809_IRQ_BATT_CHG_TEMP_HIGH_END, + AXP809_IRQ_BATT_CHG_TEMP_LOW, + AXP809_IRQ_BATT_CHG_TEMP_LOW_END, + AXP809_IRQ_BATT_ACT_TEMP_HIGH, + AXP809_IRQ_BATT_ACT_TEMP_HIGH_END, + AXP809_IRQ_BATT_ACT_TEMP_LOW, + AXP809_IRQ_BATT_ACT_TEMP_LOW_END, + AXP809_IRQ_DIE_TEMP_HIGH, + AXP809_IRQ_LOW_PWR_LVL1, + AXP809_IRQ_LOW_PWR_LVL2, + AXP809_IRQ_TIMER, + AXP809_IRQ_PEK_RIS_EDGE, + AXP809_IRQ_PEK_FAL_EDGE, + AXP809_IRQ_PEK_SHORT, + AXP809_IRQ_PEK_LONG, + AXP809_IRQ_PEK_OVER_OFF, + AXP809_IRQ_GPIO1_INPUT, + AXP809_IRQ_GPIO0_INPUT, +}; + #define AXP288_TS_ADC_H 0x58 #define AXP288_TS_ADC_L 0x59 #define AXP288_GP_ADC_H 0x5a -- cgit v1.2.3 From a8f447be8056d9ce17bf7757d6de79426700bb8b Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Fri, 8 Apr 2016 00:12:55 +0530 Subject: mfd: Add resource managed APIs for mfd_add_devices Add resource managed API devm_mfd_add_devices() for the mfd_add_devices(). This helps in reducing code in error path as it is not required to call mfd_remove_devices() explicitly to remove all child-devices. In some cases, it also helps not to implement .remove() callback which get called during driver unbind. Signed-off-by: Laxman Dewangan Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index bc6f7e00fb3d..4a0268afe546 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -131,4 +131,8 @@ static inline int mfd_add_hotplug_devices(struct device *parent, extern void mfd_remove_devices(struct device *parent); +extern int devm_mfd_add_devices(struct device *dev, int id, + const struct mfd_cell *cells, int n_devs, + struct resource *mem_base, + int irq_base, struct irq_domain *irq_domain); #endif -- cgit v1.2.3 From 679ca39fc670a5a95c2b40d2cc8cf2cee2486f7a Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 18 Apr 2016 16:53:39 +0900 Subject: usb: gadget: udc: core: add usb_gadget_{un}map_request_by_dev() If the following environment, the first argument of DMA API should be set to a DMAC's device structure, not a udc controller's one. - A udc controller needs an external DMAC device (like a DMA Engine). - The external DMAC enables IOMMU. So, this patch add usb_gadget_{un}map_request_by_dev() API to set a DMAC's device structure by a udc controller driver. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 5d4e151c49bf..457651bf45b0 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1223,9 +1223,13 @@ int usb_otg_descriptor_init(struct usb_gadget *gadget, /* utility to simplify map/unmap of usb_requests to/from DMA */ +extern int usb_gadget_map_request_by_dev(struct device *dev, + struct usb_request *req, int is_in); extern int usb_gadget_map_request(struct usb_gadget *gadget, struct usb_request *req, int is_in); +extern void usb_gadget_unmap_request_by_dev(struct device *dev, + struct usb_request *req, int is_in); extern void usb_gadget_unmap_request(struct usb_gadget *gadget, struct usb_request *req, int is_in); -- cgit v1.2.3 From 7ef224d1d0e3a1ade02d02c01ce1dcffb736d2c3 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 3 Mar 2016 12:54:42 -0600 Subject: tracing: Add 'hist' event trigger command 'hist' triggers allow users to continually aggregate trace events, which can then be viewed afterwards by simply reading a 'hist' file containing the aggregation in a human-readable format. The basic idea is very simple and boils down to a mechanism whereby trace events, rather than being exhaustively dumped in raw form and viewed directly, are automatically 'compressed' into meaningful tables completely defined by the user. This is done strictly via single-line command-line commands and without the aid of any kind of programming language or interpreter. A surprising number of typical use cases can be accomplished by users via this simple mechanism. In fact, a large number of the tasks that users typically do using the more complicated script-based tracing tools, at least during the initial stages of an investigation, can be accomplished by simply specifying a set of keys and values to be used in the creation of a hash table. The Linux kernel trace event subsystem happens to provide an extensive list of keys and values ready-made for such a purpose in the form of the event format files associated with each trace event. By simply consulting the format file for field names of interest and by plugging them into the hist trigger command, users can create an endless number of useful aggregations to help with investigating various properties of the system. See Documentation/trace/events.txt for examples. hist triggers are implemented on top of the existing event trigger infrastructure, and as such are consistent with the existing triggers from a user's perspective as well. The basic syntax follows the existing trigger syntax. Users start an aggregation by writing a 'hist' trigger to the event of interest's trigger file: # echo hist:keys=xxx [ if filter] > event/trigger Once a hist trigger has been set up, by default it continually aggregates every matching event into a hash table using the event key and a value field named 'hitcount'. To view the aggregation at any point in time, simply read the 'hist' file in the same directory as the 'trigger' file: # cat event/hist The detailed syntax provides additional options for user control, and is described exhaustively in Documentation/trace/events.txt and in the virtual tracing/README file in the tracing subsystem. Link: http://lkml.kernel.org/r/72d263b5e1853fe9c314953b65833c3aa75479f2.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Tested-by: Masami Hiramatsu Reviewed-by: Namhyung Kim Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 0810f81b6db2..404603720650 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -407,6 +407,7 @@ enum event_trigger_type { ETT_SNAPSHOT = (1 << 1), ETT_STACKTRACE = (1 << 2), ETT_EVENT_ENABLE = (1 << 3), + ETT_EVENT_HIST = (1 << 4), }; extern int filter_match_preds(struct event_filter *filter, void *rec); -- cgit v1.2.3 From 97865fe41322d83dac4373fe0a0de5b1a1b318c5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 24 Mar 2016 14:18:05 +0100 Subject: iio: st_sensors: verify interrupt event to status This makes all ST sensor drivers check that they actually have new data available for the requested channel(s) before claiming an IRQ, by reading the status register (which is conveniently the same for all ST sensors) and check that the channel has new data before proceeding to read it and fill the buffer. This way sensors can share an interrupt line: it can be flaged as shared and then the sensor that did not fire will return NO_IRQ, and the sensor that fired will handle the IRQ and return IRQ_HANDLED. Cc: Giuseppe Barba Cc: Denis Ciocca Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 6670c3d25c58..d8da075bfda0 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -37,6 +37,7 @@ #define ST_SENSORS_DEFAULT_AXIS_ADDR 0x20 #define ST_SENSORS_DEFAULT_AXIS_MASK 0x07 #define ST_SENSORS_DEFAULT_AXIS_N_BIT 3 +#define ST_SENSORS_DEFAULT_STAT_ADDR 0x27 #define ST_SENSORS_MAX_NAME 17 #define ST_SENSORS_MAX_4WAI 7 @@ -121,6 +122,7 @@ struct st_sensor_bdu { * @mask_int2: mask to enable/disable IRQ on INT2 pin. * @addr_ihl: address to enable/disable active low on the INT lines. * @mask_ihl: mask to enable/disable active low on the INT lines. + * @addr_stat_drdy: address to read status of DRDY (data ready) interrupt * struct ig1 - represents the Interrupt Generator 1 of sensors. * @en_addr: address of the enable ig1 register. * @en_mask: mask to write the on/off value for enable. @@ -131,6 +133,7 @@ struct st_sensor_data_ready_irq { u8 mask_int2; u8 addr_ihl; u8 mask_ihl; + u8 addr_stat_drdy; struct { u8 en_addr; u8 en_mask; -- cgit v1.2.3 From 0e6f6871a1591f4bb0971809c45bc91a991f1967 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 14 Apr 2016 10:45:21 +0200 Subject: iio: st_sensors: support open drain mode Some types of ST Sensors can be connected to the same IRQ line as other peripherals using open drain. Add a device tree binding and a sensor data property to flip the right bit in the interrupt control register to enable open drain mode on the INT line. If the line is set to be open drain, also tag on IRQF_SHARED to the IRQ flags when requesting the interrupt, as the whole point of using open drain interrupt lines is to share them with more than one peripheral (wire-or). Cc: devicetree@vger.kernel.org Cc: Giuseppe Barba Cc: Denis Ciocca Acked-by: Rob Herring Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 6 ++++++ include/linux/platform_data/st_sensors_pdata.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index d8da075bfda0..d029ffac0d69 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -122,6 +122,8 @@ struct st_sensor_bdu { * @mask_int2: mask to enable/disable IRQ on INT2 pin. * @addr_ihl: address to enable/disable active low on the INT lines. * @mask_ihl: mask to enable/disable active low on the INT lines. + * @addr_od: address to enable/disable Open Drain on the INT lines. + * @mask_od: mask to enable/disable Open Drain on the INT lines. * @addr_stat_drdy: address to read status of DRDY (data ready) interrupt * struct ig1 - represents the Interrupt Generator 1 of sensors. * @en_addr: address of the enable ig1 register. @@ -133,6 +135,8 @@ struct st_sensor_data_ready_irq { u8 mask_int2; u8 addr_ihl; u8 mask_ihl; + u8 addr_od; + u8 mask_od; u8 addr_stat_drdy; struct { u8 en_addr; @@ -215,6 +219,7 @@ struct st_sensor_settings { * @odr: Output data rate of the sensor [Hz]. * num_data_channels: Number of data channels used in buffer. * @drdy_int_pin: Redirect DRDY on pin 1 (1) or pin 2 (2). + * @int_pin_open_drain: Set the interrupt/DRDY to open drain. * @get_irq_data_ready: Function to get the IRQ used for data ready signal. * @tf: Transfer function structure used by I/O operations. * @tb: Transfer buffers and mutex used by I/O operations. @@ -236,6 +241,7 @@ struct st_sensor_data { unsigned int num_data_channels; u8 drdy_int_pin; + bool int_pin_open_drain; unsigned int (*get_irq_data_ready) (struct iio_dev *indio_dev); diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h index 753839187ba0..79b0e4cdb814 100644 --- a/include/linux/platform_data/st_sensors_pdata.h +++ b/include/linux/platform_data/st_sensors_pdata.h @@ -16,9 +16,11 @@ * @drdy_int_pin: Redirect DRDY on pin 1 (1) or pin 2 (2). * Available only for accelerometer and pressure sensors. * Accelerometer DRDY on LSM330 available only on pin 1 (see datasheet). + * @open_drain: set the interrupt line to be open drain if possible. */ struct st_sensors_platform_data { u8 drdy_int_pin; + bool open_drain; }; #endif /* ST_SENSORS_PDATA_H */ -- cgit v1.2.3 From 8bf872d8d261feefcdf67027522e3f717cad2bfe Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 6 Apr 2016 16:01:06 +0530 Subject: iio: core: Add devm_ APIs for iio_channel_{get,release} Some of kernel driver uses the IIO framework to get the sensor value via ADC or IIO HW driver. The client driver get iio channel by iio_channel_get() and release it by calling iio_channel_release(). Add resource managed version (devm_*) of these APIs so that if client calls the devm_iio_channel_get() then it need not to release it explicitly, it can be done by managed device framework when driver get un-binded. This reduces the code in error path and also need of .remove callback in some cases. Signed-off-by: Laxman Dewangan Signed-off-by: Jonathan Cameron --- include/linux/iio/consumer.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index fad58671c49e..e1e033d6a81f 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -48,6 +48,33 @@ struct iio_channel *iio_channel_get(struct device *dev, */ void iio_channel_release(struct iio_channel *chan); +/** + * devm_iio_channel_get() - Resource managed version of iio_channel_get(). + * @dev: Pointer to consumer device. Device name must match + * the name of the device as provided in the iio_map + * with which the desired provider to consumer mapping + * was registered. + * @consumer_channel: Unique name to identify the channel on the consumer + * side. This typically describes the channels use within + * the consumer. E.g. 'battery_voltage' + * + * Returns a pointer to negative errno if it is not able to get the iio channel + * otherwise returns valid pointer for iio channel. + * + * The allocated iio channel is automatically released when the device is + * unbound. + */ +struct iio_channel *devm_iio_channel_get(struct device *dev, + const char *consumer_channel); +/** + * devm_iio_channel_release() - Resource managed version of + * iio_channel_release(). + * @dev: Pointer to consumer device for which resource + * is allocared. + * @chan: The channel to be released. + */ +void devm_iio_channel_release(struct device *dev, struct iio_channel *chan); + /** * iio_channel_get_all() - get all channels associated with a client * @dev: Pointer to consumer device. -- cgit v1.2.3 From efc2c0133f198bc65593a67015af358919b0c48f Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 6 Apr 2016 16:01:07 +0530 Subject: iio: core: Add devm_ APIs for iio_channel_{get,release}_all Some of kernel driver uses the IIO framework to get the sensor value via ADC or IIO HW driver. The client driver get iio channel by iio_channel_get_all() and release it by calling iio_channel_release_all(). Add resource managed version (devm_*) of these APIs so that if client calls the devm_iio_channel_get_all() then it need not to release it explicitly, it can be done by managed device framework when driver get un-binded. This reduces the code in error path and also need of .remove callback in some cases. Signed-off-by: Laxman Dewangan Signed-off-by: Jonathan Cameron --- include/linux/iio/consumer.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index e1e033d6a81f..3d672f72e7ec 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -92,6 +92,32 @@ struct iio_channel *iio_channel_get_all(struct device *dev); */ void iio_channel_release_all(struct iio_channel *chan); +/** + * devm_iio_channel_get_all() - Resource managed version of + * iio_channel_get_all(). + * @dev: Pointer to consumer device. + * + * Returns a pointer to negative errno if it is not able to get the iio channel + * otherwise returns an array of iio_channel structures terminated with one with + * null iio_dev pointer. + * + * This function is used by fairly generic consumers to get all the + * channels registered as having this consumer. + * + * The allocated iio channels are automatically released when the device is + * unbounded. + */ +struct iio_channel *devm_iio_channel_get_all(struct device *dev); + +/** + * devm_iio_channel_release_all() - Resource managed version of + * iio_channel_release_all(). + * @dev: Pointer to consumer device for which resource + * is allocared. + * @chan: Array channel to be released. + */ +void devm_iio_channel_release_all(struct device *dev, struct iio_channel *chan); + struct iio_cb_buffer; /** * iio_channel_get_all_cb() - register callback for triggered capture -- cgit v1.2.3 From b0fcd8ab7b3c89b5da7fff5224d06ed73e7a33cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 23 Mar 2016 11:19:00 +0100 Subject: mtd: nand: add new enum for storing ECC algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our nand_ecc_modes_t is already a bit abused by value NAND_ECC_SOFT_BCH. This enum should store ECC mode only and putting algorithm details there is a bad idea. It would result in too many values impossible to support in a sane way. To solve this problem let's add a new enum. We'll have to modify all drivers to set it properly but once it's done it'll be possible to drop NAND_ECC_SOFT_BCH. That will result in a cleaner design and more possibilities like setting ECC algorithm for hardware ECC mode. Signed-off-by: Rafał Miłecki Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 56574ba36555..1b673e19667c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -119,6 +119,12 @@ typedef enum { NAND_ECC_SOFT_BCH, } nand_ecc_modes_t; +enum nand_ecc_algo { + NAND_ECC_UNKNOWN, + NAND_ECC_HAMMING, + NAND_ECC_BCH, +}; + /* * Constants for Hardware ECC */ @@ -458,6 +464,7 @@ struct nand_hw_control { /** * struct nand_ecc_ctrl - Control structure for ECC * @mode: ECC mode + * @algo: ECC algorithm * @steps: number of ECC steps per page * @size: data bytes per ECC step * @bytes: ECC bytes per step @@ -508,6 +515,7 @@ struct nand_hw_control { */ struct nand_ecc_ctrl { nand_ecc_modes_t mode; + enum nand_ecc_algo algo; int steps; int size; int bytes; -- cgit v1.2.3 From dd2dcc004230b9d8fa809102cd326e3ee4bbdb2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 23 Mar 2016 11:19:01 +0100 Subject: of: mtd: prepare helper reading NAND ECC algo from DT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NAND subsystem is being slightly reworked to store ECC details in separated fields. In future we'll want to add support for more DT properties as specifying every possible setup with a single "nand-ecc-mode" is a pretty bad idea. To allow this let's add a helper that will support something like "nand-ecc-algo" in future. Right now we use it for keeping backward compatibility. Signed-off-by: Rafał Miłecki Signed-off-by: Boris Brezillon --- include/linux/of_mtd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_mtd.h b/include/linux/of_mtd.h index e266caa36402..0f6aca5c6f2f 100644 --- a/include/linux/of_mtd.h +++ b/include/linux/of_mtd.h @@ -13,6 +13,7 @@ #include int of_get_nand_ecc_mode(struct device_node *np); +int of_get_nand_ecc_algo(struct device_node *np); int of_get_nand_ecc_step_size(struct device_node *np); int of_get_nand_ecc_strength(struct device_node *np); int of_get_nand_bus_width(struct device_node *np); @@ -25,6 +26,11 @@ static inline int of_get_nand_ecc_mode(struct device_node *np) return -ENOSYS; } +static inline int of_get_nand_ecc_algo(struct device_node *np) +{ + return -ENOSYS; +} + static inline int of_get_nand_ecc_step_size(struct device_node *np) { return -ENOSYS; -- cgit v1.2.3 From 7a654172161c8c9c7d59cbd0054d9e63c7411219 Mon Sep 17 00:00:00 2001 From: Raghav Dogra Date: Wed, 17 Feb 2016 16:54:18 +0530 Subject: mtd/ifc: Add support for IFC controller version 2.0 The new IFC controller version 2.0 has a different memory map page. Upto IFC 1.4 PAGE size is 4 KB and from IFC2.0 PAGE size is 64KB. This patch segregates the IFC global and runtime registers to appropriate PAGE sizes. Signed-off-by: Jaiprakash Singh Signed-off-by: Raghav Dogra Acked-by: Li Yang Signed-off-by: Raghav Dogra Acked-by: Scott Wood Acked-by: Brian Norris Signed-off-by: Boris Brezillon --- include/linux/fsl_ifc.h | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index 0023088b253b..3f9778cbc79d 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -39,6 +39,10 @@ #define FSL_IFC_VERSION_MASK 0x0F0F0000 #define FSL_IFC_VERSION_1_0_0 0x01000000 #define FSL_IFC_VERSION_1_1_0 0x01010000 +#define FSL_IFC_VERSION_2_0_0 0x02000000 + +#define PGOFFSET_64K (64*1024) +#define PGOFFSET_4K (4*1024) /* * CSPR - Chip Select Property Register @@ -723,20 +727,26 @@ struct fsl_ifc_nand { __be32 nand_evter_en; u32 res17[0x2]; __be32 nand_evter_intr_en; - u32 res18[0x2]; + __be32 nand_vol_addr_stat; + u32 res18; __be32 nand_erattr0; __be32 nand_erattr1; u32 res19[0x10]; __be32 nand_fsr; - u32 res20; - __be32 nand_eccstat[4]; - u32 res21[0x20]; + u32 res20[0x3]; + __be32 nand_eccstat[6]; + u32 res21[0x1c]; __be32 nanndcr; u32 res22[0x2]; __be32 nand_autoboot_trgr; u32 res23; __be32 nand_mdr; - u32 res24[0x5C]; + u32 res24[0x1C]; + __be32 nand_dll_lowcfg0; + __be32 nand_dll_lowcfg1; + u32 res25; + __be32 nand_dll_lowstat; + u32 res26[0x3c]; }; /* @@ -771,13 +781,12 @@ struct fsl_ifc_gpcm { __be32 gpcm_erattr1; __be32 gpcm_erattr2; __be32 gpcm_stat; - u32 res4[0x1F3]; }; /* * IFC Controller Registers */ -struct fsl_ifc_regs { +struct fsl_ifc_global { __be32 ifc_rev; u32 res1[0x2]; struct { @@ -803,21 +812,26 @@ struct fsl_ifc_regs { } ftim_cs[FSL_IFC_BANK_COUNT]; u32 res9[0x30]; __be32 rb_stat; - u32 res10[0x2]; + __be32 rb_map; + __be32 wb_map; __be32 ifc_gcr; - u32 res11[0x2]; + u32 res10[0x2]; __be32 cm_evter_stat; - u32 res12[0x2]; + u32 res11[0x2]; __be32 cm_evter_en; - u32 res13[0x2]; + u32 res12[0x2]; __be32 cm_evter_intr_en; - u32 res14[0x2]; + u32 res13[0x2]; __be32 cm_erattr0; __be32 cm_erattr1; - u32 res15[0x2]; + u32 res14[0x2]; __be32 ifc_ccr; __be32 ifc_csr; - u32 res16[0x2EB]; + __be32 ddr_ccr_low; +}; + + +struct fsl_ifc_runtime { struct fsl_ifc_nand ifc_nand; struct fsl_ifc_nor ifc_nor; struct fsl_ifc_gpcm ifc_gpcm; @@ -831,7 +845,8 @@ extern int fsl_ifc_find(phys_addr_t addr_base); struct fsl_ifc_ctrl { /* device info */ struct device *dev; - struct fsl_ifc_regs __iomem *regs; + struct fsl_ifc_global __iomem *gregs; + struct fsl_ifc_runtime __iomem *rregs; int irq; int nand_irq; spinlock_t lock; -- cgit v1.2.3 From 9d02fc2a5129449581c3108c260e96377cf35f7e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 26 Aug 2015 16:08:12 +0200 Subject: mtd: nand: export default read/write oob functions Export the default read/write oob functions (for the standard and syndrome scheme), so that drivers can use them for their raw implementation and implement their own functions for the normal oob operation. This is required if your ECC engine is capable of fixing some of the OOB data. In this case you have to overload the ->read_oob() and ->write_oob(), but if you don't specify the ->read/write_oob_raw() functions they are assigned to the ->read/write_oob() implementation, which is not what you want. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 1b673e19667c..7e06afb8552c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1078,4 +1078,18 @@ int nand_check_erased_ecc_chunk(void *data, int datalen, void *ecc, int ecclen, void *extraoob, int extraooblen, int threshold); + +/* Default write_oob implementation */ +int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); + +/* Default write_oob syndrome implementation */ +int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, + int page); + +/* Default read_oob implementation */ +int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); + +/* Default read_oob syndrome implementation */ +int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, + int page); #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From 75eb2cec251fda33c9bb716ecc372819abb9278a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 4 Feb 2016 09:52:30 +0100 Subject: mtd: add mtd_ooblayout_xxx() helper functions In order to make the ecclayout definition completely dynamic we need to rework the way the OOB layout are defined and iterated. Create a few mtd_ooblayout_xxx() helpers to ease OOB bytes manipulation and hide ecclayout internals to their users. Signed-off-by: Boris Brezillon --- include/linux/mtd/mtd.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index ef9fea4fc400..117ca1ff581d 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -108,6 +108,21 @@ struct nand_ecclayout { struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES_LARGE]; }; +/** + * struct mtd_oob_region - oob region definition + * @offset: region offset + * @length: region length + * + * This structure describes a region of the OOB area, and is used + * to retrieve ECC or free bytes sections. + * Each section is defined by an offset within the OOB area and a + * length. + */ +struct mtd_oob_region { + u32 offset; + u32 length; +}; + struct module; /* only needed for owner field in mtd_info */ struct mtd_info { @@ -253,6 +268,24 @@ struct mtd_info { int usecount; }; +int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobecc); +int mtd_ooblayout_find_eccregion(struct mtd_info *mtd, int eccbyte, + int *section, + struct mtd_oob_region *oobregion); +int mtd_ooblayout_get_eccbytes(struct mtd_info *mtd, u8 *eccbuf, + const u8 *oobbuf, int start, int nbytes); +int mtd_ooblayout_set_eccbytes(struct mtd_info *mtd, const u8 *eccbuf, + u8 *oobbuf, int start, int nbytes); +int mtd_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobfree); +int mtd_ooblayout_get_databytes(struct mtd_info *mtd, u8 *databuf, + const u8 *oobbuf, int start, int nbytes); +int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, + u8 *oobbuf, int start, int nbytes); +int mtd_ooblayout_count_freebytes(struct mtd_info *mtd); +int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd); + static inline void mtd_set_of_node(struct mtd_info *mtd, struct device_node *np) { -- cgit v1.2.3 From 036d6543f85319ffe96afad6de73d3a220917a63 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Feb 2016 18:53:44 +0100 Subject: mtd: add mtd_set_ecclayout() helper function Add an mtd_set_ecclayout() helper function to avoid direct accesses to the mtd->ecclayout field. This will ease future reworks of ECC layout definition. Signed-off-by: Boris Brezillon --- include/linux/mtd/mtd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 117ca1ff581d..e62da8462493 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -286,6 +286,12 @@ int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, int mtd_ooblayout_count_freebytes(struct mtd_info *mtd); int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd); +static inline void mtd_set_ecclayout(struct mtd_info *mtd, + struct nand_ecclayout *ecclayout) +{ + mtd->ecclayout = ecclayout; +} + static inline void mtd_set_of_node(struct mtd_info *mtd, struct device_node *np) { -- cgit v1.2.3 From adbbc3bc827eb1f43a932d783f09ba55c8ec8379 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Feb 2016 19:01:31 +0100 Subject: mtd: create an mtd_ooblayout_ops struct to ease ECC layout definition ECC layout definitions are currently exposed using the nand_ecclayout struct which embeds oobfree and eccpos arrays with predefined size. This approach was acceptable when NAND chips were providing relatively small OOB regions, but MLC and TLC now provide OOB regions of several hundreds of bytes, which implies a non negligible overhead for everybody even those who only need to support legacy NANDs. Create an mtd_ooblayout_ops interface providing the same functionality (expose the ECC and oobfree layout) without the need for this huge structure. The mtd->ecclayout is now deprecated and should be replaced by the equivalent mtd_ooblayout_ops. In the meantime we provide a wrapper around the ->ecclayout field to ease migration to this new model. Signed-off-by: Boris Brezillon --- include/linux/mtd/mtd.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index e62da8462493..177bf314ad70 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -101,6 +101,9 @@ struct mtd_oob_ops { * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained * for export to user-space via the ECCGETLAYOUT ioctl. * nand_ecclayout should be expandable in the future simply by the above macros. + * + * This structure is now deprecated, you should use struct nand_ecclayout_ops + * to describe your OOB layout. */ struct nand_ecclayout { __u32 eccbytes; @@ -123,6 +126,22 @@ struct mtd_oob_region { u32 length; }; +/* + * struct mtd_ooblayout_ops - NAND OOB layout operations + * @ecc: function returning an ECC region in the OOB area. + * Should return -ERANGE if %section exceeds the total number of + * ECC sections. + * @free: function returning a free region in the OOB area. + * Should return -ERANGE if %section exceeds the total number of + * free sections. + */ +struct mtd_ooblayout_ops { + int (*ecc)(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobecc); + int (*free)(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobfree); +}; + struct module; /* only needed for owner field in mtd_info */ struct mtd_info { @@ -181,9 +200,12 @@ struct mtd_info { const char *name; int index; - /* ECC layout structure pointer - read only! */ + /* [Deprecated] ECC layout structure pointer - read only! */ struct nand_ecclayout *ecclayout; + /* OOB layout description */ + const struct mtd_ooblayout_ops *ooblayout; + /* the ecc step size. */ unsigned int ecc_step_size; @@ -286,10 +308,12 @@ int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, int mtd_ooblayout_count_freebytes(struct mtd_info *mtd); int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd); -static inline void mtd_set_ecclayout(struct mtd_info *mtd, - struct nand_ecclayout *ecclayout) +void mtd_set_ecclayout(struct mtd_info *mtd, struct nand_ecclayout *ecclayout); + +static inline void mtd_set_ooblayout(struct mtd_info *mtd, + const struct mtd_ooblayout_ops *ooblayout) { - mtd->ecclayout = ecclayout; + mtd->ooblayout = ooblayout; } static inline void mtd_set_of_node(struct mtd_info *mtd, -- cgit v1.2.3 From 41b207a70d3a86b9e2eede155e87838234c7cbd5 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Feb 2016 19:06:15 +0100 Subject: mtd: nand: implement the default mtd_ooblayout_ops Replace the default nand_ecclayout definitions for large and small page devices with the equivalent mtd_ooblayout_ops. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 7e06afb8552c..f2ded7b1b3b8 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -748,6 +748,9 @@ struct nand_chip { void *priv; }; +extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; +extern const struct mtd_ooblayout_ops nand_ooblayout_lp_ops; + static inline void nand_set_flash_node(struct nand_chip *chip, struct device_node *np) { -- cgit v1.2.3 From 74e1fbb1375a3ede3e17da22911761ce9bc8f53f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2016 17:49:17 +0200 Subject: of: Introduce struct of_phandle_iterator This struct carrys all necessary information to iterate over a list of phandles and extract the arguments. Add an init-function for the iterator and make use of it in __of_parse_phandle_with_args(). Signed-off-by: Joerg Roedel Signed-off-by: Rob Herring --- include/linux/of.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 7fcb681baadf..0f187dbb890b 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -75,6 +75,23 @@ struct of_phandle_args { uint32_t args[MAX_PHANDLE_ARGS]; }; +struct of_phandle_iterator { + /* Common iterator information */ + const char *cells_name; + int cell_count; + const struct device_node *parent; + + /* List size information */ + const __be32 *list_end; + const __be32 *phandle_end; + + /* Current position state */ + const __be32 *cur; + uint32_t cur_count; + phandle phandle; + struct device_node *node; +}; + struct of_reconfig_data { struct device_node *dn; struct property *prop; @@ -334,6 +351,13 @@ extern int of_parse_phandle_with_fixed_args(const struct device_node *np, extern int of_count_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name); +/* phandle iterator functions */ +extern int of_phandle_iterator_init(struct of_phandle_iterator *it, + const struct device_node *np, + const char *list_name, + const char *cells_name, + int cell_count); + extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); @@ -608,6 +632,15 @@ static inline int of_count_phandle_with_args(struct device_node *np, return -ENOSYS; } +static inline int of_phandle_iterator_init(struct of_phandle_iterator *it, + const struct device_node *np, + const char *list_name, + const char *cells_name, + int cell_count) +{ + return -ENOSYS; +} + static inline int of_alias_get_id(struct device_node *np, const char *stem) { return -ENOSYS; -- cgit v1.2.3 From cd209b412c8a5d632b51af1e45576f0d00b8105f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2016 17:49:18 +0200 Subject: of: Move phandle walking to of_phandle_iterator_next() Move the code to walk over the phandles out of the loop in __of_parse_phandle_with_args() to a separate function that just works with the iterator handle: of_phandle_iterator_next(). Signed-off-by: Joerg Roedel Signed-off-by: Rob Herring --- include/linux/of.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 0f187dbb890b..1f5e108f6716 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -358,6 +358,8 @@ extern int of_phandle_iterator_init(struct of_phandle_iterator *it, const char *cells_name, int cell_count); +extern int of_phandle_iterator_next(struct of_phandle_iterator *it); + extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); @@ -641,6 +643,11 @@ static inline int of_phandle_iterator_init(struct of_phandle_iterator *it, return -ENOSYS; } +static inline int of_phandle_iterator_next(struct of_phandle_iterator *it) +{ + return -ENOSYS; +} + static inline int of_alias_get_id(struct device_node *np, const char *stem) { return -ENOSYS; -- cgit v1.2.3 From f623ce95a51baee6a6638f0b025efc0229a9ac0d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2016 17:49:20 +0200 Subject: of: Introduce of_for_each_phandle() helper macro With this macro any user can easily iterate over a list of phandles. The patch also converts __of_parse_phandle_with_args() to make use of the macro. The of_count_phandle_with_args() function is not converted, because the macro hides the return value of of_phandle_iterator_init(), which is needed in there. Signed-off-by: Joerg Roedel Signed-off-by: Rob Herring --- include/linux/of.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 1f5e108f6716..b0b80716fbfb 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -908,6 +908,12 @@ static inline int of_property_read_s32(const struct device_node *np, return of_property_read_u32(np, propname, (u32*) out_value); } +#define of_for_each_phandle(it, err, np, ln, cn, cc) \ + for (of_phandle_iterator_init((it), (np), (ln), (cn), (cc)), \ + err = of_phandle_iterator_next(it); \ + err == 0; \ + err = of_phandle_iterator_next(it)) + #define of_property_for_each_u32(np, propname, prop, p, u) \ for (prop = of_find_property(np, propname, NULL), \ p = of_prop_next_u32(prop, NULL, &u); \ -- cgit v1.2.3 From abdaa77b18480361f3565d958a2acffad268c39c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 Apr 2016 17:49:21 +0200 Subject: of: Introduce of_phandle_iterator_args() This helper function can be used to copy the arguments of a phandle to an array. Signed-off-by: Joerg Roedel Signed-off-by: Rob Herring --- include/linux/of.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index b0b80716fbfb..71e1c35a5960 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -359,6 +359,9 @@ extern int of_phandle_iterator_init(struct of_phandle_iterator *it, int cell_count); extern int of_phandle_iterator_next(struct of_phandle_iterator *it); +extern int of_phandle_iterator_args(struct of_phandle_iterator *it, + uint32_t *args, + int size); extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); @@ -648,6 +651,13 @@ static inline int of_phandle_iterator_next(struct of_phandle_iterator *it) return -ENOSYS; } +static inline int of_phandle_iterator_args(struct of_phandle_iterator *it, + uint32_t *args, + int size) +{ + return 0; +} + static inline int of_alias_get_id(struct device_node *np, const char *stem) { return -ENOSYS; -- cgit v1.2.3 From d0bad49bb0a094a1beb06640785f95cb256b7272 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 3 Mar 2016 12:54:55 -0600 Subject: tracing: Add enable_hist/disable_hist triggers Similar to enable_event/disable_event triggers, these triggers enable and disable the aggregation of events into maps rather than enabling and disabling their writing into the trace buffer. They can be used to automatically start and stop hist triggers based on a matching filter condition. If there's a paused hist trigger on system:event, the following would start it when the filter condition was hit: # echo enable_hist:system:event [ if filter] > event/trigger And the following would disable a running system:event hist trigger: # echo disable_hist:system:event [ if filter] > event/trigger See Documentation/trace/events.txt for real examples. Link: http://lkml.kernel.org/r/f812f086e52c8b7c8ad5443487375e03c96a601f.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Tested-by: Masami Hiramatsu Reviewed-by: Namhyung Kim Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 404603720650..5f89a5b0c7e6 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -408,6 +408,7 @@ enum event_trigger_type { ETT_STACKTRACE = (1 << 2), ETT_EVENT_ENABLE = (1 << 3), ETT_EVENT_HIST = (1 << 4), + ETT_HIST_ENABLE = (1 << 5), }; extern int filter_match_preds(struct event_filter *filter, void *rec); -- cgit v1.2.3 From c1d61c9bb163e696bf06850bcabbd26386554489 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 31 Mar 2016 16:34:32 -0600 Subject: PCI: Reverse standard ACS vs device-specific ACS enabling The original thought was that if a device implemented ACS, then surely we want to use that... well, it turns out that devices can make an ACS capability so broken that we still need to fall back to quirks. Reverse the order of ACS enabling to give quirks first shot at it. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 004b8133417d..aaec79aee805 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1663,7 +1663,7 @@ enum pci_fixup_pass { #ifdef CONFIG_PCI_QUIRKS void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags); -void pci_dev_specific_enable_acs(struct pci_dev *dev); +int pci_dev_specific_enable_acs(struct pci_dev *dev); #else static inline void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) { } @@ -1672,7 +1672,10 @@ static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev, { return -ENOTTY; } -static inline void pci_dev_specific_enable_acs(struct pci_dev *dev) { } +static inline int pci_dev_specific_enable_acs(struct pci_dev *dev) +{ + return -ENOTTY; +} #endif void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); -- cgit v1.2.3 From a14b9e0512404ed7d4415b888dc9f1f9785a4fa3 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 5 Feb 2016 16:40:47 -0800 Subject: clkdev: Remove clk_register_clkdevs() Now that we've converted the only caller over to another clkdev API, remove this one. Reviewed-by: Andy Shevchenko Cc: Russell King Signed-off-by: Stephen Boyd --- include/linux/clkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index c2c04f7cbe8a..e6f8eb1d585f 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -45,7 +45,6 @@ void clkdev_add_table(struct clk_lookup *, size_t); int clk_add_alias(const char *, const char *, const char *, struct device *); int clk_register_clkdev(struct clk *, const char *, const char *); -int clk_register_clkdevs(struct clk *, struct clk_lookup *, size_t); #ifdef CONFIG_COMMON_CLK int __clk_get(struct clk *clk); -- cgit v1.2.3 From 4143804c4fdef40358c654d1fb2271a1a0f1fedf Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 5 Feb 2016 17:02:52 -0800 Subject: clk: Add {devm_}clk_hw_{register,unregister}() APIs We've largely split the clk consumer and provider APIs along struct clk and struct clk_hw, but clk_register() still returns a struct clk pointer for each struct clk_hw that's registered. Eventually we'd like to only allocate struct clks when there's a user, because struct clk is per-user now, so clk_register() needs to change. Let's add new APIs to register struct clk_hws, but this time we'll hide the struct clk from the caller by returning an int error code. Also add an unregistration API that takes the clk_hw structure that was passed to the registration API. This way provider drivers never have to deal with a struct clk pointer unless they're using the clk consumer APIs. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index da95258127aa..bc6c8de1fac1 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -655,9 +655,15 @@ struct clk *clk_register_gpio_mux(struct device *dev, const char *name, struct clk *clk_register(struct device *dev, struct clk_hw *hw); struct clk *devm_clk_register(struct device *dev, struct clk_hw *hw); +int __must_check clk_hw_register(struct device *dev, struct clk_hw *hw); +int __must_check devm_clk_hw_register(struct device *dev, struct clk_hw *hw); + void clk_unregister(struct clk *clk); void devm_clk_unregister(struct device *dev, struct clk *clk); +void clk_hw_unregister(struct clk_hw *hw); +void devm_clk_hw_unregister(struct device *dev, struct clk_hw *hw); + /* helper functions */ const char *__clk_get_name(const struct clk *clk); const char *clk_hw_get_name(const struct clk_hw *hw); -- cgit v1.2.3 From 0861e5b8cf80038e91942f1005c8dfce79d18c38 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 5 Feb 2016 17:38:26 -0800 Subject: clk: Add clk_hw OF clk providers Now that we have a clk registration API that doesn't return struct clks, we need to have some way to hand out struct clks via the clk_get() APIs that doesn't involve associating struct clk pointers with an OF node. Currently we ask the OF provider to give us a struct clk pointer for some clkspec, turn that struct clk into a struct clk_hw and then allocate a new struct clk to return to the caller. Let's add a clk_hw based OF provider hook that returns a struct clk_hw directly, so that we skip the intermediate step of converting from struct clk to struct clk_hw. Eventually when we've converted all OF clk providers to struct clk_hw based APIs we can remove the struct clk based ones. It should also be noted that we change the onecell provider to have a flex array instead of a pointer for the array of clk_hw pointers. This allows providers to allocate one structure of the correct length in one step instead of two. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bc6c8de1fac1..bf8c8bb8c2cb 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -709,6 +709,11 @@ struct clk_onecell_data { unsigned int clk_num; }; +struct clk_hw_onecell_data { + size_t num; + struct clk_hw *hws[]; +}; + extern struct of_device_id __clk_of_table; #define CLK_OF_DECLARE(name, compat, fn) OF_DECLARE_1(clk, name, compat, fn) @@ -718,10 +723,18 @@ int of_clk_add_provider(struct device_node *np, struct clk *(*clk_src_get)(struct of_phandle_args *args, void *data), void *data); +int of_clk_add_hw_provider(struct device_node *np, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, + void *data), + void *data); void of_clk_del_provider(struct device_node *np); struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec, void *data); +struct clk_hw *of_clk_hw_simple_get(struct of_phandle_args *clkspec, + void *data); struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data); +struct clk_hw *of_clk_hw_onecell_get(struct of_phandle_args *clkspec, + void *data); unsigned int of_clk_get_parent_count(struct device_node *np); int of_clk_parent_fill(struct device_node *np, const char **parents, unsigned int size); @@ -738,17 +751,34 @@ static inline int of_clk_add_provider(struct device_node *np, { return 0; } +static inline int of_clk_add_hw_provider(struct device_node *np, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, + void *data), + void *data) +{ + return 0; +} static inline void of_clk_del_provider(struct device_node *np) {} static inline struct clk *of_clk_src_simple_get( struct of_phandle_args *clkspec, void *data) { return ERR_PTR(-ENOENT); } +static inline struct clk_hw * +of_clk_hw_simple_get(struct of_phandle_args *clkspec, void *data) +{ + return ERR_PTR(-ENOENT); +} static inline struct clk *of_clk_src_onecell_get( struct of_phandle_args *clkspec, void *data) { return ERR_PTR(-ENOENT); } +static inline struct clk_hw * +of_clk_hw_onecell_get(struct of_phandle_args *clkspec, void *data) +{ + return ERR_PTR(-ENOENT); +} static inline int of_clk_get_parent_count(struct device_node *np) { return 0; -- cgit v1.2.3 From e4f1b49bda6d6aa2e13730ff7eeccbe65a6271f1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 8 Feb 2016 14:59:49 -0800 Subject: clkdev: Add clk_hw based registration APIs Now that we have a clk registration API that doesn't return struct clks, we need to have some way to hand out struct clks via the clk_get() APIs that doesn't involve associating struct clk pointers with a struct clk_lookup. Luckily, clkdev already operates on struct clk_hw pointers, except for the registration facing APIs where it converts struct clk pointers into struct clk_hw pointers almost immediately. Let's add clk_hw based registration APIs so that we can skip the conversion step and provide a way for clk provider drivers to operate exclusively on clk_hw structs. This way we clearly split the API between consumers and providers. Cc: Russell King Signed-off-by: Stephen Boyd --- include/linux/clkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index e6f8eb1d585f..2eabc862abdb 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -15,6 +15,7 @@ #include struct clk; +struct clk_hw; struct device; struct clk_lookup { @@ -34,17 +35,22 @@ struct clk_lookup { struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...) __printf(3, 4); +struct clk_lookup *clkdev_hw_alloc(struct clk_hw *hw, const char *con_id, + const char *dev_fmt, ...) __printf(3, 4); void clkdev_add(struct clk_lookup *cl); void clkdev_drop(struct clk_lookup *cl); struct clk_lookup *clkdev_create(struct clk *clk, const char *con_id, const char *dev_fmt, ...) __printf(3, 4); +struct clk_lookup *clkdev_hw_create(struct clk_hw *hw, const char *con_id, + const char *dev_fmt, ...) __printf(3, 4); void clkdev_add_table(struct clk_lookup *, size_t); int clk_add_alias(const char *, const char *, const char *, struct device *); int clk_register_clkdev(struct clk *, const char *, const char *); +int clk_hw_register_clkdev(struct clk_hw *, const char *, const char *); #ifdef CONFIG_COMMON_CLK int __clk_get(struct clk *clk); -- cgit v1.2.3 From eb7d264f3bf9ca7c093efb77bdde557c6c6e826f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 6 Feb 2016 23:26:37 -0800 Subject: clk: divider: Add hw based registration APIs Add registration APIs in the clk divider code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bf8c8bb8c2cb..8885d0350596 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -407,12 +407,22 @@ struct clk *clk_register_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, spinlock_t *lock); +struct clk_hw *clk_hw_register_divider(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_divider_flags, spinlock_t *lock); struct clk *clk_register_divider_table(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); +struct clk_hw *clk_hw_register_divider_table(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_divider_flags, const struct clk_div_table *table, + spinlock_t *lock); void clk_unregister_divider(struct clk *clk); +void clk_hw_unregister_divider(struct clk_hw *hw); /** * struct clk_mux - multiplexer clock -- cgit v1.2.3 From e270d8cb13763f58107198e879cf396511ba2867 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 6 Feb 2016 23:54:45 -0800 Subject: clk: gate: Add hw based registration APIs Add registration APIs in the clk gate code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 8885d0350596..bf12050aadd5 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -326,7 +326,12 @@ struct clk *clk_register_gate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 bit_idx, u8 clk_gate_flags, spinlock_t *lock); +struct clk_hw *clk_hw_register_gate(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 bit_idx, + u8 clk_gate_flags, spinlock_t *lock); void clk_unregister_gate(struct clk *clk); +void clk_hw_unregister_gate(struct clk_hw *hw); struct clk_div_table { unsigned int val; -- cgit v1.2.3 From 264b31719735eb1fcbed47cecdb20f517e804856 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:05:48 -0800 Subject: clk: mux: Add hw based registration APIs Add registration APIs in the clk mux code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bf12050aadd5..d690d99b9c1c 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -478,14 +478,25 @@ struct clk *clk_register_mux(struct device *dev, const char *name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_mux_flags, spinlock_t *lock); +struct clk_hw *clk_hw_register_mux(struct device *dev, const char *name, + const char * const *parent_names, u8 num_parents, + unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_mux_flags, spinlock_t *lock); struct clk *clk_register_mux_table(struct device *dev, const char *name, const char * const *parent_names, u8 num_parents, unsigned long flags, void __iomem *reg, u8 shift, u32 mask, u8 clk_mux_flags, u32 *table, spinlock_t *lock); +struct clk_hw *clk_hw_register_mux_table(struct device *dev, const char *name, + const char * const *parent_names, u8 num_parents, + unsigned long flags, + void __iomem *reg, u8 shift, u32 mask, + u8 clk_mux_flags, u32 *table, spinlock_t *lock); void clk_unregister_mux(struct clk *clk); +void clk_hw_unregister_mux(struct clk_hw *hw); void of_fixed_factor_clk_setup(struct device_node *node); -- cgit v1.2.3 From 0759ac8a73dc2c8cc8ac697fbe5dbd8d67348d37 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:11:06 -0800 Subject: clk: fixed-factor: Add hw based registration APIs Add registration APIs in the clk fixed-factor code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index d690d99b9c1c..79ad1a8a6831 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -525,6 +525,10 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); void clk_unregister_fixed_factor(struct clk *clk); +struct clk_hw *clk_hw_register_fixed_factor(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + unsigned int mult, unsigned int div); +void clk_hw_unregister_fixed_factor(struct clk_hw *hw); /** * struct clk_fractional_divider - adjustable fractional divider clock -- cgit v1.2.3 From 39b44cff4ad4af6d7abd9dd2acb288b005c26503 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:15:09 -0800 Subject: clk: fractional-divider: Add hw based registration APIs Add registration APIs in the clk fractional divider code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 79ad1a8a6831..bcbaf6c95d52 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -563,6 +563,11 @@ struct clk *clk_register_fractional_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth, u8 clk_divider_flags, spinlock_t *lock); +struct clk_hw *clk_hw_register_fractional_divider(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth, + u8 clk_divider_flags, spinlock_t *lock); +void clk_hw_unregister_fractional_divider(struct clk_hw *hw); /** * struct clk_multiplier - adjustable multiplier clock -- cgit v1.2.3 From 49cb392d36397a296dcd51ec57cf83585a89a94a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:20:31 -0800 Subject: clk: composite: Add hw based registration APIs Add registration APIs in the clk composite code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index bcbaf6c95d52..456c3ced1ac9 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -638,6 +638,13 @@ struct clk *clk_register_composite(struct device *dev, const char *name, struct clk_hw *rate_hw, const struct clk_ops *rate_ops, struct clk_hw *gate_hw, const struct clk_ops *gate_ops, unsigned long flags); +struct clk_hw *clk_hw_register_composite(struct device *dev, const char *name, + const char * const *parent_names, int num_parents, + struct clk_hw *mux_hw, const struct clk_ops *mux_ops, + struct clk_hw *rate_hw, const struct clk_ops *rate_ops, + struct clk_hw *gate_hw, const struct clk_ops *gate_ops, + unsigned long flags); +void clk_hw_unregister_composite(struct clk_hw *hw); /*** * struct clk_gpio_gate - gpio gated clock -- cgit v1.2.3 From b120743a64a3ec68b8c5310a6009094329b4a33b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:27:55 -0800 Subject: clk: gpio: Add hw based registration APIs Add registration APIs in the clk gpio code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 456c3ced1ac9..6c36c5e8ccbe 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -667,6 +667,10 @@ extern const struct clk_ops clk_gpio_gate_ops; struct clk *clk_register_gpio_gate(struct device *dev, const char *name, const char *parent_name, unsigned gpio, bool active_low, unsigned long flags); +struct clk_hw *clk_hw_register_gpio_gate(struct device *dev, const char *name, + const char *parent_name, unsigned gpio, bool active_low, + unsigned long flags); +void clk_hw_unregister_gpio_gate(struct clk_hw *hw); /** * struct clk_gpio_mux - gpio controlled clock multiplexer @@ -682,6 +686,10 @@ extern const struct clk_ops clk_gpio_mux_ops; struct clk *clk_register_gpio_mux(struct device *dev, const char *name, const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low, unsigned long flags); +struct clk_hw *clk_hw_register_gpio_mux(struct device *dev, const char *name, + const char * const *parent_names, u8 num_parents, unsigned gpio, + bool active_low, unsigned long flags); +void clk_hw_unregister_gpio_mux(struct clk_hw *hw); /** * clk_register - allocate a new clock, register it and return an opaque cookie -- cgit v1.2.3 From 26ef56be9e0944a9b136169eb47140f309ce745b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sun, 7 Feb 2016 00:34:13 -0800 Subject: clk: fixed-rate: Add hw based registration APIs Add registration APIs in the clk fixed-rate code to return struct clk_hw pointers instead of struct clk pointers. This way we hide the struct clk pointer from providers unless they need to use consumer facing APIs. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 6c36c5e8ccbe..c3fc042d517c 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -282,10 +282,17 @@ extern const struct clk_ops clk_fixed_rate_ops; struct clk *clk_register_fixed_rate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate); +struct clk_hw *clk_hw_register_fixed_rate(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + unsigned long fixed_rate); struct clk *clk_register_fixed_rate_with_accuracy(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate, unsigned long fixed_accuracy); void clk_unregister_fixed_rate(struct clk *clk); +struct clk_hw *clk_hw_register_fixed_rate_with_accuracy(struct device *dev, + const char *name, const char *parent_name, unsigned long flags, + unsigned long fixed_rate, unsigned long fixed_accuracy); + void of_fixed_clk_setup(struct device_node *np); /** -- cgit v1.2.3 From 607ea7cda6315be0ad8be2f98bc9de6f2d656ae6 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 18 Apr 2016 14:41:10 +0300 Subject: net/ipv6/addrconf: simplify sysctl registration Struct ctl_table_header holds pointer to sysctl table which could be used for freeing it after unregistration. IPv4 sysctls already use that. Remove redundant NULL assignment: ndev allocated using kzalloc. This also saves some bytes: sysctl table could be shorter than DEVCONF_MAX+1 if some options are disable in config. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7edc14fb66b6..58d6e158755f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -63,7 +63,8 @@ struct ipv6_devconf { } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; - void *sysctl; + + struct ctl_table_header *sysctl_header; }; struct ipv6_params { -- cgit v1.2.3 From bd570ff970a54df653b48ed0cfb373f2ebed083d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 18 Apr 2016 21:01:24 +0200 Subject: bpf: add event output helper for notifications/sampling/logging This patch adds a new helper for cls/act programs that can push events to user space applications. For networking, this can be f.e. for sampling, debugging, logging purposes or pushing of arbitrary wake-up events. The idea is similar to a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") and 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example"). The eBPF program utilizes a perf event array map that user space populates with fds from perf_event_open(), the eBPF program calls into the helper f.e. as skb_event_output(skb, &my_map, BPF_F_CURRENT_CPU, raw, sizeof(raw)) so that the raw data is pushed into the fd f.e. at the map index of the current CPU. User space can poll/mmap/etc on this and has a data channel for receiving events that can be post-processed. The nice thing is that since the eBPF program and user space application making use of it are tightly coupled, they can define their own arbitrary raw data format and what/when they want to push. While f.e. packet headers could be one part of the meta data that is being pushed, this is not a substitute for things like packet sockets as whole packet is not being pushed and push is only done in a single direction. Intention is more of a generically usable, efficient event pipe to applications. Workflow is that tc can pin the map and applications can attach themselves e.g. after cls/act setup to one or multiple map slots, demuxing is done by the eBPF program. Adding this facility is with minimal effort, it reuses the helper introduced in a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") and we get its functionality for free by overloading its BPF_FUNC_ identifier for cls/act programs, ctx is currently unused, but will be made use of in future. Example will be added to iproute2's BPF example files. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5fb3c610fa96..f63afdc43bec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -169,7 +169,9 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_fd_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); + const struct bpf_func_proto *bpf_get_trace_printk_proto(void); +const struct bpf_func_proto *bpf_get_event_output_proto(void); #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); -- cgit v1.2.3 From b853cb9628bfbcc4017da46d5f5b46e3eba9d8c6 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 28 Mar 2016 21:35:22 -0700 Subject: soc: qcom: smd: Make callback pass channel reference By passing the smd channel reference to the callback, rather than the smd device, we can open additional smd channels from sub-devices of smd devices. Also updates the two smd clients today found in mainline. Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smd.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index bd51c8a9d807..cb2f81559bc0 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -26,7 +26,7 @@ struct qcom_smd_device { struct qcom_smd_channel *channel; }; -typedef int (*qcom_smd_cb_t)(struct qcom_smd_device *, const void *, size_t); +typedef int (*qcom_smd_cb_t)(struct qcom_smd_channel *, const void *, size_t); /** * struct qcom_smd_driver - smd driver struct @@ -50,13 +50,16 @@ struct qcom_smd_driver { int qcom_smd_driver_register(struct qcom_smd_driver *drv); void qcom_smd_driver_unregister(struct qcom_smd_driver *drv); +void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel); +void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data); + #define module_qcom_smd_driver(__smd_driver) \ module_driver(__smd_driver, qcom_smd_driver_register, \ qcom_smd_driver_unregister) int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); -struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_device *sdev, +struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_channel *channel, const char *name, qcom_smd_cb_t cb); -- cgit v1.2.3 From 12524e348bcb2189a8cf43829e90256f7e7d4f3d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Apr 2016 11:18:06 +0200 Subject: clk: renesas: mstp: Provide dummy attach/detach_dev callbacks Provide dummy cpg_mstp_{at,de}tach_dev() PM Domain callbacks if CPG/MSTP support is not included, so the rcar-sysc driver won't have to care about this. Signed-off-by: Geert Uytterhoeven Acked-by: Laurent Pinchart --- include/linux/clk/renesas.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index 095b1681daf4..ab57a298a374 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -25,7 +25,12 @@ void r8a7779_clocks_init(u32 mode); void rcar_gen2_clocks_init(u32 mode); void cpg_mstp_add_clk_domain(struct device_node *np); +#ifdef CONFIG_CLK_RENESAS_CPG_MSTP int cpg_mstp_attach_dev(struct generic_pm_domain *unused, struct device *dev); void cpg_mstp_detach_dev(struct generic_pm_domain *unused, struct device *dev); +#else +#define cpg_mstp_attach_dev NULL +#define cpg_mstp_detach_dev NULL +#endif #endif -- cgit v1.2.3 From 2066390ad47b374f3d35075a32325b47d15bf735 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Mar 2016 17:03:46 +0100 Subject: clk: renesas: cpg-mssr: Export cpg_mssr_{at,de}tach_dev() The R-Car SYSC PM Domain driver has to power manage devices in power areas using clocks. To reuse code and to share knowledge of clocks suitable for power management, this is ideally done through the existing cpg_mssr_attach_dev() and cpg_mssr_detach_dev() callbacks. Hence these callbacks can no longer rely on their "domain" parameter pointing to the CPG/MSSR Clock Domain. To handle this, keep a pointer to the clock domain in a static variable. cpg_mssr_attach_dev() has to support probe deferral, as the R-Car SYSC PM Domain may be initialized, and devices may be added to it, before the CPG/MSSR Clock Domain is initialized. Dummy callbacks are provided for the case where CPG/MSTP support is not included, so the rcar-sysc driver won't have to care about this. Signed-off-by: Geert Uytterhoeven Acked-by: Laurent Pinchart --- include/linux/clk/renesas.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index ab57a298a374..ba6fa4148515 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -33,4 +33,11 @@ void cpg_mstp_detach_dev(struct generic_pm_domain *unused, struct device *dev); #define cpg_mstp_detach_dev NULL #endif +#ifdef CONFIG_CLK_RENESAS_CPG_MSSR +int cpg_mssr_attach_dev(struct generic_pm_domain *unused, struct device *dev); +void cpg_mssr_detach_dev(struct generic_pm_domain *unused, struct device *dev); +#else +#define cpg_mssr_attach_dev NULL +#define cpg_mssr_detach_dev NULL +#endif #endif -- cgit v1.2.3 From 58ea8abf490415c390e0cc671e875510c9b66318 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Mon, 18 Apr 2016 09:21:44 -0500 Subject: crypto: ccp - Register the CCP as a DMA resource The CCP has the ability to provide DMA services to the kernel using pass-through mode of the device. Register these services as general purpose DMA channels. Changes since v2: - Add a Signed-off-by Changes since v1: - Allocate memory for a string in ccp_dmaengine_register - Ensure register/unregister calls are properly ordered - Verified all changed files are listed in the diffstat - Undo some superfluous changes - Added a cc: Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index 915af3095b39..7c2bb27c067c 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -1,9 +1,10 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2013 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky + * Author: Gary R Hook * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -381,6 +382,35 @@ struct ccp_passthru_engine { u32 final; }; +/** + * struct ccp_passthru_nomap_engine - CCP pass-through operation + * without performing DMA mapping + * @bit_mod: bitwise operation to perform + * @byte_swap: byteswap operation to perform + * @mask: mask to be applied to data + * @mask_len: length in bytes of mask + * @src: data to be used for this operation + * @dst: data produced by this operation + * @src_len: length in bytes of data used for this operation + * @final: indicate final pass-through operation + * + * Variables required to be set when calling ccp_enqueue_cmd(): + * - bit_mod, byte_swap, src, dst, src_len + * - mask, mask_len if bit_mod is not CCP_PASSTHRU_BITWISE_NOOP + */ +struct ccp_passthru_nomap_engine { + enum ccp_passthru_bitwise bit_mod; + enum ccp_passthru_byteswap byte_swap; + + dma_addr_t mask; + u32 mask_len; /* In bytes */ + + dma_addr_t src_dma, dst_dma; + u64 src_len; /* In bytes */ + + u32 final; +}; + /***** ECC engine *****/ #define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */ #define CCP_ECC_MAX_OPERANDS 6 @@ -522,7 +552,8 @@ enum ccp_engine { }; /* Flag values for flags member of ccp_cmd */ -#define CCP_CMD_MAY_BACKLOG 0x00000001 +#define CCP_CMD_MAY_BACKLOG 0x00000001 +#define CCP_CMD_PASSTHRU_NO_DMA_MAP 0x00000002 /** * struct ccp_cmd - CPP operation request @@ -562,6 +593,7 @@ struct ccp_cmd { struct ccp_sha_engine sha; struct ccp_rsa_engine rsa; struct ccp_passthru_engine passthru; + struct ccp_passthru_nomap_engine passthru_nomap; struct ccp_ecc_engine ecc; } u; -- cgit v1.2.3 From 2e4682ba2ed79d8082b78d292b3b80f54d970b7a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 10 Mar 2016 16:30:22 +0100 Subject: KVM: add missing memory barrier in kvm_{make,check}_request kvm_make_request and kvm_check_request imply a producer-consumer relationship; add implicit memory barriers to them. There was indeed already a place that was adding an explicit smp_mb() to order between kvm_check_request and the processing of the request. That memory barrier can be removed (as an added benefit, kvm_check_request can use smp_mb__after_atomic which is free on x86). Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5276fe0916fc..ad40d44784c7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1091,6 +1091,11 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) { + /* + * Ensure the rest of the request is published to kvm_check_request's + * caller. Paired with the smp_mb__after_atomic in kvm_check_request. + */ + smp_wmb(); set_bit(req, &vcpu->requests); } @@ -1098,6 +1103,12 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) { if (test_bit(req, &vcpu->requests)) { clear_bit(req, &vcpu->requests); + + /* + * Ensure the rest of the request is visible to kvm_check_request's + * caller. Paired with the smp_wmb in kvm_make_request. + */ + smp_mb__after_atomic(); return true; } else { return false; -- cgit v1.2.3 From de4a54c4dfaed0604565c1b27488dce56997acc0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Jan 2016 20:19:41 +0000 Subject: regulator: core: Use a bitfield for continuous_voltage_range Using a bitfield enables the compiler to lay out the structure more efficiently when we have other boolean flags since multiple values can be included in a single byte. Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index cd271e89a7e6..9ac3f9879576 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -292,7 +292,7 @@ struct regulator_desc { const struct regulator_desc *, struct regulator_config *); int id; - bool continuous_voltage_range; + unsigned int continuous_voltage_range:1; unsigned n_voltages; const struct regulator_ops *ops; int irq; -- cgit v1.2.3 From 9257b4a206fc0229dd5f84b78e4d1ebf3f91d270 Mon Sep 17 00:00:00 2001 From: Omer Peleg Date: Wed, 20 Apr 2016 11:34:11 +0300 Subject: iommu/iova: introduce per-cpu caching to iova allocation IOVA allocation has two problems that impede high-throughput I/O. First, it can do a linear search over the allocated IOVA ranges. Second, the rbtree spinlock that serializes IOVA allocations becomes contended. Address these problems by creating an API for caching allocated IOVA ranges, so that the IOVA allocator isn't accessed frequently. This patch adds a per-CPU cache, from which CPUs can alloc/free IOVAs without taking the rbtree spinlock. The per-CPU caches are backed by a global cache, to avoid invoking the (linear-time) IOVA allocator without needing to make the per-CPU cache size excessive. This design is based on magazines, as described in "Magazines and Vmem: Extending the Slab Allocator to Many CPUs and Arbitrary Resources" (currently available at https://www.usenix.org/legacy/event/usenix01/bonwick.html) Adding caching on top of the existing rbtree allocator maintains the property that IOVAs are densely packed in the IO virtual address space, which is important for keeping IOMMU page table usage low. To keep the cache size reasonable, we bound the IOVA space a CPU can cache by 32 MiB (we cache a bounded number of IOVA ranges, and only ranges of size <= 128 KiB). The shared global cache is bounded at 4 MiB of IOVA space. Signed-off-by: Omer Peleg [mad@cs.technion.ac.il: rebased, cleaned up and reworded the commit message] Signed-off-by: Adam Morrison Reviewed-by: Shaohua Li Reviewed-by: Ben Serebrin [dwmw2: split out VT-d part into a separate patch] Signed-off-by: David Woodhouse --- include/linux/iova.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 92f7177db2ce..f27bb2c62fca 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -19,8 +19,21 @@ /* iova structure */ struct iova { struct rb_node node; - unsigned long pfn_hi; /* IOMMU dish out addr hi */ - unsigned long pfn_lo; /* IOMMU dish out addr lo */ + unsigned long pfn_hi; /* Highest allocated pfn */ + unsigned long pfn_lo; /* Lowest allocated pfn */ +}; + +struct iova_magazine; +struct iova_cpu_rcache; + +#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ +#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ + +struct iova_rcache { + spinlock_t lock; + unsigned long depot_size; + struct iova_magazine *depot[MAX_GLOBAL_MAGS]; + struct iova_cpu_rcache __percpu *cpu_rcaches; }; /* holds all the iova translations for a domain */ @@ -31,6 +44,7 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ }; static inline unsigned long iova_size(struct iova *iova) @@ -78,6 +92,10 @@ void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool size_aligned); +void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, + unsigned long size); +unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); @@ -87,5 +105,6 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); +void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); #endif -- cgit v1.2.3 From 80e0f8d94d3090f0f7bf3faf3e6180e920ee0d22 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 24 Feb 2016 14:12:59 +0530 Subject: pinctrl: Add devm_ apis for pinctrl_{register, unregister} Add device managed APIs devm_pinctrl_register() and devm_pinctrl_unregister() for the APIs pinctrl_register() and pinctrl_unregister(). This helps in reducing code in error path and sometimes removal of .remove callback for driver unbind. Signed-off-by: Laxman Dewangan Reviewed-by: Philipp Zabel Acked-by: Bjorn Andersson Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 9ba59fcba549..a42e57da270d 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -144,6 +144,12 @@ struct pinctrl_desc { extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data); extern void pinctrl_unregister(struct pinctrl_dev *pctldev); +extern struct pinctrl_dev *devm_pinctrl_register(struct device *dev, + struct pinctrl_desc *pctldesc, + void *driver_data); +extern void devm_pinctrl_unregister(struct device *dev, + struct pinctrl_dev *pctldev); + extern bool pin_is_valid(struct pinctrl_dev *pctldev, int pin); extern void pinctrl_add_gpio_range(struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range); -- cgit v1.2.3 From b53f27e4fa0d0e72d897830cc4f3f83d2a25d952 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:23 -0700 Subject: string_helpers: add kstrdup_quotable Handle allocating and escaping a string safe for logging. Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/string_helpers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index dabe643eb5fa..9de228af00c1 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -68,4 +68,6 @@ static inline int string_escape_str_any_np(const char *src, char *dst, return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, only); } +char *kstrdup_quotable(const char *src, gfp_t gfp); + #endif -- cgit v1.2.3 From 0d0443288f2244d7054796086e481ddef6abdbba Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:24 -0700 Subject: string_helpers: add kstrdup_quotable_cmdline Provide an escaped (but readable: no inter-argument NULLs) commandline safe for logging. Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/string_helpers.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 9de228af00c1..684d2695fc36 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -69,5 +69,6 @@ static inline int string_escape_str_any_np(const char *src, char *dst, } char *kstrdup_quotable(const char *src, gfp_t gfp); +char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); #endif -- cgit v1.2.3 From 21985319add60b55fc27230d9421a3e5af7e998a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:25 -0700 Subject: string_helpers: add kstrdup_quotable_file Allocate a NULL-terminated file path with special characters escaped, safe for logging. Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/string_helpers.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 684d2695fc36..5ce9538f290e 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -3,6 +3,8 @@ #include +struct file; + /* Descriptions of the types of units to * print in */ enum string_size_units { @@ -70,5 +72,6 @@ static inline int string_escape_str_any_np(const char *src, char *dst, char *kstrdup_quotable(const char *src, gfp_t gfp); char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp); +char *kstrdup_quotable_file(struct file *file, gfp_t gfp); #endif -- cgit v1.2.3 From 1284ab5b2dcb927d38e4f3fbc2e307f3d1af9262 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 20 Apr 2016 15:46:27 -0700 Subject: fs: define a string representation of the kernel_read_file_id enumeration A string representation of the kernel_read_file_id enumeration is needed for displaying messages (eg. pr_info, auditing) that can be used by multiple LSMs and the integrity subsystem. To simplify keeping the list of strings up to date with the enumeration, this patch defines two new preprocessing macros named __fid_enumify and __fid_stringify to create the enumeration and an array of strings. kernel_read_file_id_str() returns a string based on the enumeration. Signed-off-by: Mimi Zohar [kees: removed removal of my old version, constified pointer values] Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/fs.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..90477550b935 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2580,15 +2580,34 @@ static inline void i_readcount_inc(struct inode *inode) #endif extern int do_pipe_flags(int *, int); +#define __kernel_read_file_id(id) \ + id(UNKNOWN, unknown) \ + id(FIRMWARE, firmware) \ + id(MODULE, kernel-module) \ + id(KEXEC_IMAGE, kexec-image) \ + id(KEXEC_INITRAMFS, kexec-initramfs) \ + id(POLICY, security-policy) \ + id(MAX_ID, ) + +#define __fid_enumify(ENUM, dummy) READING_ ## ENUM, +#define __fid_stringify(dummy, str) #str, + enum kernel_read_file_id { - READING_FIRMWARE = 1, - READING_MODULE, - READING_KEXEC_IMAGE, - READING_KEXEC_INITRAMFS, - READING_POLICY, - READING_MAX_ID + __kernel_read_file_id(__fid_enumify) +}; + +static const char * const kernel_read_file_str[] = { + __kernel_read_file_id(__fid_stringify) }; +static inline const char * const kernel_read_file_id_str(enum kernel_read_file_id id) +{ + if (id < 0 || id >= READING_MAX_ID) + return kernel_read_file_str[READING_UNKNOWN]; + + return kernel_read_file_str[id]; +} + extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern int kernel_read_file(struct file *, void **, loff_t *, loff_t, enum kernel_read_file_id); -- cgit v1.2.3 From 9b091556a073a9f5f93e2ad23d118f45c4796a84 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 15:46:28 -0700 Subject: LSM: LoadPin for kernel file loading restrictions This LSM enforces that kernel-loaded files (modules, firmware, etc) must all come from the same filesystem, with the expectation that such a filesystem is backed by a read-only device such as dm-verity or CDROM. This allows systems that have a verified and/or unchangeable filesystem to enforce module and firmware loading restrictions without needing to sign the files individually. Signed-off-by: Kees Cook Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index ae2537886177..6e466fc0666c 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1892,5 +1892,10 @@ extern void __init yama_add_hooks(void); #else static inline void __init yama_add_hooks(void) { } #endif +#ifdef CONFIG_SECURITY_LOADPIN +void __init loadpin_add_hooks(void); +#else +static inline void loadpin_add_hooks(void) { }; +#endif #endif /* ! __LINUX_LSM_HOOKS_H */ -- cgit v1.2.3 From 85b67bcb7e4a23ced05e7020bf5843b9857f6881 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 18 Apr 2016 20:11:50 -0700 Subject: perf, bpf: minimize the size of perf_trace_() tracepoint handler move trace_call_bpf() into helper function to minimize the size of perf_trace_*() tracepoint handlers. text data bss dec hex filename 10541679 5526646 2945024 19013349 1221ee5 vmlinux_before 10509422 5526646 2945024 18981092 121a0e4 vmlinux_after It may seem that perf_fetch_caller_regs() can also be moved, but that is incorrect, since ip/sp will be wrong. bpf+tracepoint performance is not affected, since perf_swevent_put_recursion_context() is now inlined. export_symbol_gpl can also be dropped. No measurable change in normal perf tracepoints. Suggested-by: Steven Rostedt Signed-off-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt Signed-off-by: David S. Miller --- include/linux/trace_events.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index fe6441203b59..222f6aa0418f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -609,6 +609,11 @@ extern void ftrace_profile_free_filter(struct perf_event *event); void perf_trace_buf_update(void *record, u16 type); void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); +void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, + struct trace_event_call *call, u64 count, + struct pt_regs *regs, struct hlist_head *head, + struct task_struct *task); + static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, -- cgit v1.2.3 From b1c20f0b97b4e565fa50cde1e6b44c2fd327a1e0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 19 Apr 2016 14:02:19 -0400 Subject: netdev_features: Fold NETIF_F_ALL_TSO into NETIF_F_GSO_SOFTWARE This patch folds NETIF_F_ALL_TSO into the bitmask for NETIF_F_GSO_SOFTWARE. The idea is to avoid duplication of defines since the only difference between the two was the GSO_UDP bit. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 15eb0b12fff9..bc8736266749 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -152,11 +152,6 @@ enum { #define NETIF_F_GSO_MASK (__NETIF_F_BIT(NETIF_F_GSO_LAST + 1) - \ __NETIF_F_BIT(NETIF_F_GSO_SHIFT)) -/* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \ - NETIF_F_TSO_MANGLEID | \ - NETIF_F_TSO6 | NETIF_F_UFO) - /* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- * this would be contradictory @@ -170,6 +165,9 @@ enum { #define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ NETIF_F_FSO) +/* List of features with software fallbacks. */ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO) + /* * If one device supports one of these features, then enable them * for all in netdev_increment_features. -- cgit v1.2.3 From 237cd218099ce96edf2890a49aa191b38b84c2fc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:09 +0300 Subject: net/mlx5: Introduce device queue counters A queue counter can collect several statistics for one or more hardware queues (QPs, RQs, etc ..) that the counter is attached to. For Ethernet it will provide an "out of buffer" counter which collects the number of all packets that are dropped due to lack of software buffers. Here we add device commands to alloc/query/dealloc queue counters. Signed-off-by: Tariq Toukan Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/qp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index cf031a3f16c5..64221027bf1f 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -668,6 +668,12 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq); void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq); +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id); +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id); +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size); +int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, + u32 *out_of_buffer); static inline const char *mlx5_qp_type_str(int type) { -- cgit v1.2.3 From 461017cb006aa1b39b0f647ae0ee2d9d84eef05b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 20 Apr 2016 22:02:13 +0300 Subject: net/mlx5e: Support RX multi-packet WQE (Striding RQ) Introduce the feature of multi-packet WQE (RX Work Queue Element) referred to as (MPWQE or Striding RQ), in which WQEs are larger and serve multiple packets each. Every WQE consists of many strides of the same size, every received packet is aligned to a beginning of a stride and is written to consecutive strides within a WQE. In the regular approach, each regular WQE is big enough to be capable of serving one received packet of any size up to MTU or 64K in case of device LRO is enabled, making it very wasteful when dealing with small packets or device LRO is enabled. For its flexibility, MPWQE allows a better memory utilization (implying improvements in CPU utilization and packet rate) as packets consume strides according to their size, preserving the rest of the WQE to be available for other packets. MPWQE default configuration: Num of WQEs = 16 Strides Per WQE = 2048 Stride Size = 64 byte The default WQEs memory footprint went from 1024*mtu (~1.5MB) to 16 * 2048 * 64 = 2MB per ring. However, HW LRO can now be supported at no additional cost in memory footprint, and hence we turn it on by default and get an even better performance. Performance tested on ConnectX4-Lx 50G. To isolate the feature under test, the numbers below were measured with HW LRO turned off. We verified that the performance just improves when LRO is turned back on. * Netperf single TCP stream: - BW raised by 10-15% for representative packet sizes: default, 64B, 1024B, 1478B, 65536B. * Netperf multi TCP stream: - No degradation, line rate reached. * Pktgen: packet rate raised by 2-10% for traffic of different message sizes: 64B, 128B, 256B, 1024B, and 1500B. * Pktgen: packet loss in bursts of small messages (64byte), single stream: - | num packets | packets loss before | packets loss after | 2K | ~ 1K | 0 | 8K | ~ 6K | 0 | 16K | ~13K | 0 | 32K | ~28K | 0 | 64K | ~57K | ~24K As expected as the driver can receive as many small packets (<=64B) as the number of total strides in the ring (default = 2048 * 16) vs. 1024 (default ring size regardless of packets size) before this feature. Signed-off-by: Tariq Toukan Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8156e3c9239c..03f8d719b680 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -644,7 +644,8 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 rsvd0[4]; + u8 rsvd0[2]; + __be16 wqe_id; u8 lro_tcppsh_abort_dupack; u8 lro_min_ttl; __be16 lro_tcp_win; @@ -696,6 +697,42 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) return (u64)lo | ((u64)hi << 32); } +struct mpwrq_cqe_bc { + __be16 filler_consumed_strides; + __be16 byte_cnt; +}; + +static inline u16 mpwrq_get_cqe_byte_cnt(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return be16_to_cpu(bc->byte_cnt); +} + +static inline u16 mpwrq_get_cqe_bc_consumed_strides(struct mpwrq_cqe_bc *bc) +{ + return 0x7fff & be16_to_cpu(bc->filler_consumed_strides); +} + +static inline u16 mpwrq_get_cqe_consumed_strides(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return mpwrq_get_cqe_bc_consumed_strides(bc); +} + +static inline bool mpwrq_is_filler_cqe(struct mlx5_cqe64 *cqe) +{ + struct mpwrq_cqe_bc *bc = (struct mpwrq_cqe_bc *)&cqe->byte_cnt; + + return 0x8000 & be16_to_cpu(bc->filler_consumed_strides); +} + +static inline u16 mpwrq_get_cqe_stride_index(struct mlx5_cqe64 *cqe) +{ + return be16_to_cpu(cqe->wqe_counter); +} + enum { CQE_L4_HDR_TYPE_NONE = 0x0, CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, -- cgit v1.2.3 From b7aade15485a660cbf5161962c284131324a9534 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 18 Apr 2016 21:19:47 +0200 Subject: vxlan: break dependency with netdev drivers Currently all drivers depend and autoload the vxlan module because how vxlan_get_rx_port is linked into them. Remove this dependency: By using a new event type in the netdevice notifier call chain we proxy the request from the drivers to flush and resetup the vxlan ports not directly via function call but by the already existing netdevice notifier call chain. I added a separate new event type, NETDEV_OFFLOAD_PUSH_VXLAN, to do so. We don't need to save those ids, as the event type field is an unsigned long and using specialized event types for this purpose seemed to be a more elegant way. This also comes in beneficial if in future we want to add offloading knobs for vxlan. Cc: Jesse Gross Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a3bb534576a3..d4c8cd424f8d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2244,6 +2244,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_BONDING_INFO 0x0019 #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B +#define NETDEV_OFFLOAD_PUSH_VXLAN 0x001C int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 681e683ff30ada19f73c17c38a528528dd8824f1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 18 Apr 2016 21:19:48 +0200 Subject: geneve: break dependency with netdev drivers Equivalent to "vxlan: break dependency with netdev drivers", don't autoload geneve module in case the driver is loaded. Instead make the coupling weaker by using netdevice notifiers as proxy. Cc: Jesse Gross Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d4c8cd424f8d..1f6d5db471a2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2245,6 +2245,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B #define NETDEV_OFFLOAD_PUSH_VXLAN 0x001C +#define NETDEV_OFFLOAD_PUSH_GENEVE 0x001D int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 92a39d9043ba5ff98adb1c31491f00c7bea5466e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 23 Mar 2016 17:38:24 +0100 Subject: clk: composite: Add unregister function The composite clock didn't have any unregistration function, which forced us to use clk_unregister directly on it. While it was already not great from an API point of view, it also meant that we were leaking the clk_composite structure allocated in clk_register_composite. Add a clk_unregister_composite function to fix this. Signed-off-by: Maxime Ripard Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index da95258127aa..26a8c9b7be71 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -603,6 +603,7 @@ struct clk *clk_register_composite(struct device *dev, const char *name, struct clk_hw *rate_hw, const struct clk_ops *rate_ops, struct clk_hw *gate_hw, const struct clk_ops *gate_ops, unsigned long flags); +void clk_unregister_composite(struct clk *clk); /*** * struct clk_gpio_gate - gpio gated clock -- cgit v1.2.3 From 372a12ed9d99c02f105278a9b75f0cb176d15cc1 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 8 Apr 2016 13:40:53 +0200 Subject: PM / Runtime: Move ignore_children flag under CONFIG_PM The ignore_children flag is used only when CONFIG_PM is set, so let's move it into that section within the struct dev_pm_info. Move also the corresponding pm_suspend_ignore_children() API out of device.h into pm_runtime.h, to be consistent with similar APIs. Unfortunate this causes the Toshiba PCI SD mmc host driver to fail to compile as it needs pm_runtime.h, so let's fix this here as well. Signed-off-by: Ulf Hansson Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 5 ----- include/linux/pm.h | 2 +- include/linux/pm_runtime.h | 6 ++++++ 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 002c59728dbe..b130304f9b1b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -956,11 +956,6 @@ static inline bool device_async_suspend_enabled(struct device *dev) return !!dev->power.async_suspend; } -static inline void pm_suspend_ignore_children(struct device *dev, bool enable) -{ - dev->power.ignore_children = enable; -} - static inline void dev_pm_syscore_device(struct device *dev, bool val) { #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/pm.h b/include/linux/pm.h index 6a5d654f4447..06eb353182ab 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -563,7 +563,6 @@ struct dev_pm_info { bool is_suspended:1; /* Ditto */ bool is_noirq_suspended:1; bool is_late_suspended:1; - bool ignore_children:1; bool early_init:1; /* Owned by the PM core */ bool direct_complete:1; /* Owned by the PM core */ spinlock_t lock; @@ -591,6 +590,7 @@ struct dev_pm_info { unsigned int deferred_resume:1; unsigned int run_wake:1; unsigned int runtime_auto:1; + bool ignore_children:1; unsigned int no_callbacks:1; unsigned int irq_safe:1; unsigned int use_autosuspend:1; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 7af093d6a4dd..2e14d2667b6c 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -56,6 +56,11 @@ extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); +static inline void pm_suspend_ignore_children(struct device *dev, bool enable) +{ + dev->power.ignore_children = enable; +} + static inline bool pm_children_suspended(struct device *dev) { return dev->power.ignore_children @@ -156,6 +161,7 @@ static inline void __pm_runtime_disable(struct device *dev, bool c) {} static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} +static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline bool pm_children_suspended(struct device *dev) { return false; } static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} -- cgit v1.2.3 From 9df3921e026532eb3bd2904745d990c0a9f0b4e4 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 31 Mar 2016 11:21:25 +0200 Subject: PM / Domains: Rename stop_ok to suspend_ok for the genpd governor The genpd governor validates the latency constraints to find out whether it's acceptable to runtime suspend a device. Earlier this validation was made to know whether it was okay to invoke the ->stop() callback for the device, hence the governor used the name "stop_ok" for the related variables. To clarify the code around this, let's rename these variables from "stop_ok" to "suspend_ok". Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 49cd8890b873..e91393954384 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -28,7 +28,7 @@ enum gpd_status { struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); - bool (*stop_ok)(struct device *dev); + bool (*suspend_ok)(struct device *dev); }; struct gpd_dev_ops { @@ -94,7 +94,7 @@ struct gpd_timing_data { s64 resume_latency_ns; s64 effective_constraint_ns; bool constraint_changed; - bool cached_stop_ok; + bool cached_suspend_ok; }; struct pm_domain_data { -- cgit v1.2.3 From 54eeddbf92d0de297d78f7419dde00079d553dec Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 31 Mar 2016 11:21:27 +0200 Subject: PM / Domains: Remove ->save|restore_state() callbacks As a part of the ongoing consolidation of genpd, it's become questionable whether clients actually needs to be able to assign their own set of ->save|restore_state() callbacks. Currently all users copes fine with the default callbacks, so let's remove the configuration option and stick to the default ones. This enables further clarifications of the related code and let's also rename pm_genpd_default_save|restore_state() into __genpd_runtime_suspend|resume() to apply the rule of static functionnames in genpd. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index e91393954384..39285c7bd3f5 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -34,8 +34,6 @@ struct dev_power_governor { struct gpd_dev_ops { int (*start)(struct device *dev); int (*stop)(struct device *dev); - int (*save_state)(struct device *dev); - int (*restore_state)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v1.2.3 From 916633a403702549d37ea353e63a68e5b0dc27ad Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Apr 2016 17:12:31 +0200 Subject: locking/rwsem: Provide down_write_killable() Now that all the architectures implement the necessary glue code we can introduce down_write_killable(). The only difference wrt. regular down_write() is that the slow path waits in TASK_KILLABLE state and the interruption by the fatal signal is reported as -EINTR to the caller. Signed-off-by: Michal Hocko Cc: Andrew Morton Cc: Chris Zankel Cc: David S. Miller Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Signed-off-by: Davidlohr Bueso Cc: Signed-off-by: Jason Low Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1460041951-22347-12-git-send-email-mhocko@kernel.org Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 15 +++++++++++++++ include/linux/rwsem.h | 1 + 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d026b190c530..accfe56d8c51 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -444,6 +444,18 @@ do { \ lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) +#define LOCK_CONTENDED_RETURN(_lock, try, lock) \ +({ \ + int ____err = 0; \ + if (!try(_lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + ____err = lock(_lock); \ + } \ + if (!____err) \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ + ____err; \ +}) + #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) @@ -452,6 +464,9 @@ do { \ #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) +#define LOCK_CONTENDED_RETURN(_lock, try, lock) \ + lock(_lock) + #endif /* CONFIG_LOCK_STAT */ #ifdef CONFIG_LOCKDEP diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 7d7ae029dac5..d1c12d160ace 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -118,6 +118,7 @@ extern int down_read_trylock(struct rw_semaphore *sem); * lock for writing */ extern void down_write(struct rw_semaphore *sem); +extern int __must_check down_write_killable(struct rw_semaphore *sem); /* * trylock for writing -- returns 1 if successful, 0 if contention -- cgit v1.2.3 From be32bcbbd18213803ff24e480340d4d2cca1df4f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Apr 2016 14:02:36 +0200 Subject: soc: renesas: Move pm-rcar to drivers/soc/renesas/rcar-sysc Move the pm-rcar driver from arch/arm/mach-shmobile/ to drivers/soc/renesas/, and its header file to include/linux/soc/renesas/, so it can be shared between arm32 (R-Car H1 and Gen2) and arm64 (R-Car Gen3). Rename it to rcar-sysc as it's really a driver for the R-Car System Controller (SYSC). Kill the intermediate PM_RCAR config symbol, as it's not user configurable anymore, and to prepare for SoC-specific make rules. Add the missing #include to rcar-sysc.h, which was exposed by different include order. Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Simon Horman --- include/linux/soc/renesas/rcar-sysc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/soc/renesas/rcar-sysc.h (limited to 'include/linux') diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h new file mode 100644 index 000000000000..96f30c288388 --- /dev/null +++ b/include/linux/soc/renesas/rcar-sysc.h @@ -0,0 +1,17 @@ +#ifndef __LINUX_SOC_RENESAS_RCAR_SYSC_H__ +#define __LINUX_SOC_RENESAS_RCAR_SYSC_H__ + +#include + +struct rcar_sysc_ch { + u16 chan_offs; + u8 chan_bit; + u8 isr_bit; +}; + +int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch); +int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch); +bool rcar_sysc_power_is_off(const struct rcar_sysc_ch *sysc_ch); +void __iomem *rcar_sysc_init(phys_addr_t base); + +#endif /* __LINUX_SOC_RENESAS_RCAR_SYSC_H__ */ -- cgit v1.2.3 From 2f024cef5b064d5498fe466dfe9492d46b5b12a4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Apr 2016 14:02:39 +0200 Subject: soc: renesas: rcar-sysc: Make rcar_sysc_power_is_off() static As of commit b12ff41658171f53 ("ARM: shmobile: r8a7779: Remove legacy PM Domain remainings"), rcar_sysc_power_is_off() is no longer used from SoC-specific code. Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Simon Horman --- include/linux/soc/renesas/rcar-sysc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h index 96f30c288388..92fc613ab23d 100644 --- a/include/linux/soc/renesas/rcar-sysc.h +++ b/include/linux/soc/renesas/rcar-sysc.h @@ -11,7 +11,6 @@ struct rcar_sysc_ch { int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch); int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch); -bool rcar_sysc_power_is_off(const struct rcar_sysc_ch *sysc_ch); void __iomem *rcar_sysc_init(phys_addr_t base); #endif /* __LINUX_SOC_RENESAS_RCAR_SYSC_H__ */ -- cgit v1.2.3 From 80dfd83dfab6e49a31ab8fc484a801aef1c567bd Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 13 Apr 2016 17:04:41 -0700 Subject: x86, drivers/pnpbios: Replace paravirt_enabled() check with legacy device check Since we are removing paravirt_enabled() replace it with a logical equivalent. Even though PNPBIOS is x86 specific we add an arch-specific type call, which can be implemented by any architecture to show how other legacy attribute devices can later be also checked for with other ACPI legacy attribute flags. This implicates the first ACPI 5.2.9.3 IA-PC Boot Architecture ACPI_FADT_LEGACY_DEVICES flag device, and shows how to add more. The reason pnpbios gets a defined structure and as such uses a different approach than the RTC legacy quirk is that ACPI has a respective RTC flag, while pnpbios does not. We fold the pnpbios quirk under ACPI_FADT_LEGACY_DEVICES ACPI flag use case, and use a struct of possible devices to enable future extensions of this. As per 0-day, this bumps the vmlinux size using i386-tinyconfig as follows: TOTAL TEXT init.text x86_early_init_platform_quirks() +32 +28 +28 +28 That's 4 byte overhead total, the rest is cleared out on init as its all __init text. v2: split out subarch handlng on switch to make it easier later to add other subarchs. The 'fall-through' switch handling can be confusing and we'll remove it later when we add handling for X86_SUBARCH_CE4100. v3: document vmlinux size impact as per 0-day, and also explain why pnpbios is treated differently than the RTC legacy feature. Signed-off-by: Luis R. Rodriguez Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andrew.cooper3@citrix.com Cc: andriy.shevchenko@linux.intel.com Cc: bigeasy@linutronix.de Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: ffainelli@freebox.fr Cc: george.dunlap@citrix.com Cc: glin@suse.com Cc: jgross@suse.com Cc: jlee@suse.com Cc: josh@joshtriplett.org Cc: julien.grall@linaro.org Cc: konrad.wilk@oracle.com Cc: kozerkov@parallels.com Cc: lenb@kernel.org Cc: lguest@lists.ozlabs.org Cc: linux-acpi@vger.kernel.org Cc: lv.zheng@intel.com Cc: matt@codeblueprint.co.uk Cc: mbizon@freebox.fr Cc: rjw@rjwysocki.net Cc: robert.moore@intel.com Cc: rusty@rustcorp.com.au Cc: tiwai@suse.de Cc: toshi.kani@hp.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/1460592286-300-12-git-send-email-mcgrof@kernel.org Signed-off-by: Ingo Molnar --- include/linux/pnp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 5df733b8f704..2588ca6a9028 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -337,9 +337,11 @@ extern struct mutex pnp_res_mutex; #ifdef CONFIG_PNPBIOS extern struct pnp_protocol pnpbios_protocol; +extern bool arch_pnpbios_disabled(void); #define pnp_device_is_pnpbios(dev) ((dev)->protocol == (&pnpbios_protocol)) #else #define pnp_device_is_pnpbios(dev) 0 +#define arch_pnpbios_disabled() false #endif #ifdef CONFIG_PNPACPI -- cgit v1.2.3 From 1dff8083a024650c75a9c961c38082473ceae8cf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 18 Apr 2016 18:04:57 +0200 Subject: mm: replace open coded page to virt conversion with page_to_virt() The open coded conversion from struct page address to virtual address in lowmem_page_address() involves an intermediate conversion step to pfn number/physical address. Since the placement of the struct page array relative to the linear mapping may be completely independent from the placement of physical RAM (as is that case for arm64 after commit dfd55ad85e 'arm64: vmemmap: use virtual projection of linear region'), the conversion to physical address and back again should factor out of the equation, but unfortunately, the shifting and pointer arithmetic involved prevent this from happening, and the resulting calculation essentially subtracts the address of the start of physical memory and adds it back again, in a way that prevents the compiler from optimizing it away. Since the start of physical memory is not a build time constant on arm64, the resulting conversion involves an unnecessary memory access, which we would like to get rid of. So replace the open coded conversion with a call to page_to_virt(), and use the open coded conversion as its default definition, to be overriden by the architecture, if desired. The existing arch specific definitions of page_to_virt are all equivalent to this default definition, so by itself this patch is a no-op. Acked-by: Andrew Morton Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- include/linux/mm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ffcff53e3b2b..1c1e921edf93 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -72,6 +72,10 @@ extern int mmap_rnd_compat_bits __read_mostly; #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) #endif +#ifndef page_to_virt +#define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) +#endif + /* * To prevent common memory management code establishing * a zero page mapping on a read fault. @@ -948,7 +952,7 @@ static inline struct mem_cgroup *page_memcg(struct page *page) static __always_inline void *lowmem_page_address(const struct page *page) { - return __va(PFN_PHYS(page_to_pfn(page))); + return page_to_virt(page); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) -- cgit v1.2.3 From a7ab72390b77062420fb50e4451f71c9321aae05 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 20 Apr 2016 08:38:50 +0200 Subject: i2c: mux: add common data for every i2c-mux instance All i2c-muxes have a parent adapter and one or many child adapters. A mux also has some means of selection. Previously, this was stored per child adapter, but it is only needed to keep track of this per mux. Add an i2c mux core, that keeps track of this consistently. Also add some glue for users of the old interface, which will create one implicit mux core per child adapter. Signed-off-by: Peter Rosin Tested-by: Antti Palosaari Tested-by: Crestez Dan Leonard Signed-off-by: Wolfram Sang --- include/linux/i2c-mux.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index b5f9a007a3ab..71ac1b3f4f68 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -27,6 +27,31 @@ #ifdef __KERNEL__ +struct i2c_mux_core { + struct i2c_adapter *parent; + struct device *dev; + + void *priv; + + int (*select)(struct i2c_mux_core *, u32 chan_id); + int (*deselect)(struct i2c_mux_core *, u32 chan_id); + + int num_adapters; + int max_adapters; + struct i2c_adapter *adapter[0]; +}; + +struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent, + struct device *dev, int max_adapters, + int sizeof_priv, u32 flags, + int (*select)(struct i2c_mux_core *, u32), + int (*deselect)(struct i2c_mux_core *, u32)); + +static inline void *i2c_mux_priv(struct i2c_mux_core *muxc) +{ + return muxc->priv; +} + /* * Called to create a i2c bus on a multiplexed bus segment. * The mux_dev and chan_id parameters are passed to the select @@ -41,8 +66,17 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, void *mux_dev, u32 chan_id), int (*deselect) (struct i2c_adapter *, void *mux_dev, u32 chan_id)); +/* + * Called to create an i2c bus on a multiplexed bus segment. + * The chan_id parameter is passed to the select and deselect + * callback functions to perform hardware-specific mux control. + */ +int i2c_mux_add_adapter(struct i2c_mux_core *muxc, + u32 force_nr, u32 chan_id, + unsigned int class); void i2c_del_mux_adapter(struct i2c_adapter *adap); +void i2c_mux_del_adapters(struct i2c_mux_core *muxc); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 23fe440c59b9f08afe108e7ec7b6714cb2a3b955 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 2 Mar 2016 15:14:22 +0100 Subject: i2c: mux: drop old unused i2c-mux api All i2c mux users are using an explicit i2c mux core, drop support for implicit i2c mux cores. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- include/linux/i2c-mux.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index 71ac1b3f4f68..2fa93fe1345e 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -52,20 +52,6 @@ static inline void *i2c_mux_priv(struct i2c_mux_core *muxc) return muxc->priv; } -/* - * Called to create a i2c bus on a multiplexed bus segment. - * The mux_dev and chan_id parameters are passed to the select - * and deselect callback functions to perform hardware-specific - * mux control. - */ -struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent, - struct device *mux_dev, - void *mux_priv, u32 force_nr, u32 chan_id, - unsigned int class, - int (*select) (struct i2c_adapter *, - void *mux_dev, u32 chan_id), - int (*deselect) (struct i2c_adapter *, - void *mux_dev, u32 chan_id)); /* * Called to create an i2c bus on a multiplexed bus segment. * The chan_id parameter is passed to the select and deselect @@ -75,7 +61,6 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, u32 force_nr, u32 chan_id, unsigned int class); -void i2c_del_mux_adapter(struct i2c_adapter *adap); void i2c_mux_del_adapters(struct i2c_mux_core *muxc); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 02fad5e9b433da3829d39f0afb3c51b4b6409ed5 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 9 Mar 2016 18:16:54 -0600 Subject: clocksource: Add missing include of of.h. This header uses OF_DELCARE_1 which is defined in linux/of.h. This fixes getting unhelpful compiler error messages about missing ')' before a string constant. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: David Lechner Signed-off-by: John Stultz --- include/linux/clocksource.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index a307bf62974f..44a1aff22566 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 457db29bfcfd1d9cc717587c446a89d60499d4a9 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 8 Apr 2016 14:02:11 +0800 Subject: security: Introduce security_settime64() security_settime() uses a timespec, which is not year 2038 safe on 32bit systems. Thus this patch introduces the security_settime64() function with timespec64 type. We also convert the cap_settime() helper function to use the 64bit types. This patch then moves security_settime() to the header file as an inline helper function so that existing users can be iteratively converted. None of the existing hooks is using the timespec argument and therefor the patch is not making any functional changes. Cc: Serge Hallyn , Cc: James Morris , Cc: "Serge E. Hallyn" , Cc: Paul Moore Cc: Stephen Smalley Cc: Kees Cook Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Reviewed-by: James Morris Signed-off-by: Baolin Wang [jstultz: Reworded commit message] Signed-off-by: John Stultz --- include/linux/lsm_hooks.h | 5 +++-- include/linux/security.h | 20 +++++++++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index cdee11cbcdf1..41ab4662f95c 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1190,7 +1190,8 @@ * Return 0 if permission is granted. * @settime: * Check permission to change the system time. - * struct timespec and timezone are defined in include/linux/time.h + * struct timespec64 is defined in include/linux/time64.h and timezone + * is defined in include/linux/time.h * @ts contains new time * @tz contains new timezone * Return 0 if permission is granted. @@ -1327,7 +1328,7 @@ union security_list_options { int (*quotactl)(int cmds, int type, int id, struct super_block *sb); int (*quota_on)(struct dentry *dentry); int (*syslog)(int type); - int (*settime)(const struct timespec *ts, const struct timezone *tz); + int (*settime)(const struct timespec64 *ts, const struct timezone *tz); int (*vm_enough_memory)(struct mm_struct *mm, long pages); int (*bprm_set_creds)(struct linux_binprm *bprm); diff --git a/include/linux/security.h b/include/linux/security.h index 157f0cb1e4d2..35ac8d9d4739 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -71,7 +71,7 @@ struct timezone; /* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, int cap, int audit); -extern int cap_settime(const struct timespec *ts, const struct timezone *tz); +extern int cap_settime(const struct timespec64 *ts, const struct timezone *tz); extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -208,7 +208,13 @@ int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns, int security_quotactl(int cmds, int type, int id, struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); -int security_settime(const struct timespec *ts, const struct timezone *tz); +int security_settime64(const struct timespec64 *ts, const struct timezone *tz); +static inline int security_settime(const struct timespec *ts, const struct timezone *tz) +{ + struct timespec64 ts64 = timespec_to_timespec64(*ts); + + return security_settime64(&ts64, tz); +} int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_bprm_set_creds(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); @@ -462,10 +468,18 @@ static inline int security_syslog(int type) return 0; } +static inline int security_settime64(const struct timespec64 *ts, + const struct timezone *tz) +{ + return cap_settime(ts, tz); +} + static inline int security_settime(const struct timespec *ts, const struct timezone *tz) { - return cap_settime(ts, tz); + struct timespec64 ts64 = timespec_to_timespec64(*ts); + + return cap_settime(&ts64, tz); } static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) -- cgit v1.2.3 From 86d3473224b004f920c107206d181d37db735145 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 8 Apr 2016 14:02:12 +0800 Subject: time: Introduce do_sys_settimeofday64() The do_sys_settimeofday() function uses a timespec, which is not year 2038 safe on 32bit systems. Thus this patch introduces do_sys_settimeofday64(), which allows us to transition users of do_sys_settimeofday() to using 64bit time types. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Baolin Wang [jstultz: Include errno-base.h to avoid build issue on some arches] Signed-off-by: John Stultz --- include/linux/timekeeping.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 96f37bee3bc1..37dbacf84849 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -1,6 +1,8 @@ #ifndef _LINUX_TIMEKEEPING_H #define _LINUX_TIMEKEEPING_H +#include + /* Included from linux/ktime.h */ void timekeeping_init(void); @@ -11,8 +13,19 @@ extern int timekeeping_suspended; */ extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday64(const struct timespec64 *ts); -extern int do_sys_settimeofday(const struct timespec *tv, - const struct timezone *tz); +extern int do_sys_settimeofday64(const struct timespec64 *tv, + const struct timezone *tz); +static inline int do_sys_settimeofday(const struct timespec *tv, + const struct timezone *tz) +{ + struct timespec64 ts64; + + if (!tv) + return -EINVAL; + + ts64 = timespec_to_timespec64(*tv); + return do_sys_settimeofday64(&ts64, tz); +} /* * Kernel time accessors -- cgit v1.2.3 From 9d90725ddca347450c4ab177ad680ed76063afd4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 18 Mar 2016 11:27:36 -0700 Subject: libnvdimm, blk: move i/o infrastructure to nd_namespace_blk Consolidate the information for issuing i/o to a blk-namespace, and eliminate some pointer chasing. Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- include/linux/nd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index 5489ab756d1a..5ea4aec7fd63 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -82,6 +82,7 @@ struct nd_namespace_pmem { * @uuid: namespace name supplied in the dimm label * @id: ida allocated id * @lbasize: blk namespaces have a native sector size when btt not present + * @size: sum of all the resource ranges allocated to this namespace * @num_resources: number of dpa extents to claim * @res: discontiguous dpa extents for given dimm */ @@ -91,6 +92,7 @@ struct nd_namespace_blk { u8 *uuid; int id; unsigned long lbasize; + resource_size_t size; int num_resources; struct resource **res; }; -- cgit v1.2.3 From 200c79da824c978fcf6eec1dc9c0a1e521133267 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 22 Mar 2016 00:22:16 -0700 Subject: libnvdimm, pmem, pfn: make pmem_rw_bytes generic and refactor pfn setup In preparation for providing an alternative (to block device) access mechanism to persistent memory, convert pmem_rw_bytes() to nsio_rw_bytes(). This allows ->rw_bytes() functionality without requiring a 'struct pmem_device' to be instantiated. In other words, when ->rw_bytes() is in use i/o is driven through 'struct nd_namespace_io', otherwise it is driven through 'struct pmem_device' and the block layer. This consolidates the disjoint calls to devm_exit_badblocks() and devm_memunmap() into a common devm_nsio_disable() and cleans up the init path to use a unified pmem_attach_disk() implementation. Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- include/linux/nd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index 5ea4aec7fd63..aee2761d294c 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -15,6 +15,7 @@ #include #include #include +#include enum nvdimm_event { NVDIMM_REVALIDATE_POISON, @@ -55,13 +56,19 @@ static inline struct nd_namespace_common *to_ndns(struct device *dev) } /** - * struct nd_namespace_io - infrastructure for loading an nd_pmem instance + * struct nd_namespace_io - device representation of a persistent memory range * @dev: namespace device created by the nd region driver * @res: struct resource conversion of a NFIT SPA table + * @size: cached resource_size(@res) for fast path size checks + * @addr: virtual address to access the namespace range + * @bb: badblocks list for the namespace range */ struct nd_namespace_io { struct nd_namespace_common common; struct resource res; + resource_size_t size; + void __pmem *addr; + struct badblocks bb; }; /** -- cgit v1.2.3 From 9ecda41acb971ebd07c8fb35faf24005c0baea12 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 5 Apr 2016 14:11:18 +0000 Subject: perf/core: Add ::write_backward attribute to perf event This patch introduces 'write_backward' bit to perf_event_attr, which controls the direction of a ring buffer. After set, the corresponding ring buffer is written from end to beginning. This feature is design to support reading from overwritable ring buffer. Ring buffer can be created by mapping a perf event fd. Kernel puts event records into ring buffer, user tooling like perf fetch them from address returned by mmap(). To prevent racing between kernel and tooling, they communicate to each other through 'head' and 'tail' pointers. Kernel maintains 'head' pointer, points it to the next free area (tail of the last record). Tooling maintains 'tail' pointer, points it to the tail of last consumed record (record has already been fetched). Kernel determines the available space in a ring buffer using these two pointers to avoid overwrite unfetched records. By mapping without 'PROT_WRITE', an overwritable ring buffer is created. Different from normal ring buffer, tooling is unable to maintain 'tail' pointer because writing is forbidden. Therefore, for this type of ring buffers, kernel overwrite old records unconditionally, works like flight recorder. This feature would be useful if reading from overwritable ring buffer were as easy as reading from normal ring buffer. However, there's an obscure problem. The following figure demonstrates a full overwritable ring buffer. In this figure, the 'head' pointer points to the end of last record, and a long record 'E' is pending. For a normal ring buffer, a 'tail' pointer would have pointed to position (X), so kernel knows there's no more space in the ring buffer. However, for an overwritable ring buffer, kernel ignore the 'tail' pointer. (X) head . | . V +------+-------+----------+------+---+ |A....A|B.....B|C........C|D....D| | +------+-------+----------+------+---+ Record 'A' is overwritten by event 'E': head | V +--+---+-------+----------+------+---+ |.E|..A|B.....B|C........C|D....D|E..| +--+---+-------+----------+------+---+ Now tooling decides to read from this ring buffer. However, none of these two natural positions, 'head' and the start of this ring buffer, are pointing to the head of a record. Even the full ring buffer can be accessed by tooling, it is unable to find a position to start decoding. The first attempt tries to solve this problem AFAIK can be found from [1]. It makes kernel to maintain 'tail' pointer: updates it when ring buffer is half full. However, this approach introduces overhead to fast path. Test result shows a 1% overhead [2]. In addition, this method utilizes no more tham 50% records. Another attempt can be found from [3], which allows putting the size of an event at the end of each record. This approach allows tooling to find records in a backward manner from 'head' pointer by reading size of a record from its tail. However, because of alignment requirement, it needs 8 bytes to record the size of a record, which is a huge waste. Its performance is also not good, because more data need to be written. This approach also introduces some extra branch instructions to fast path. 'write_backward' is a better solution to this problem. Following figure demonstrates the state of the overwritable ring buffer when 'write_backward' is set before overwriting: head | V +---+------+----------+-------+------+ | |D....D|C........C|B.....B|A....A| +---+------+----------+-------+------+ and after overwriting: head | V +---+------+----------+-------+---+--+ |..E|D....D|C........C|B.....B|A..|E.| +---+------+----------+-------+---+--+ In each situation, 'head' points to the beginning of the newest record. From this record, tooling can iterate over the full ring buffer and fetch records one by one. The only limitation that needs to be considered is back-to-back reading. Due to the non-deterministic of user programs, it is impossible to ensure the ring buffer keeps stable during reading. Consider an extreme situation: tooling is scheduled out after reading record 'D', then a burst of events come, eat up the whole ring buffer (one or multiple rounds). When the tooling process comes back, reading after 'D' is incorrect now. To prevent this problem, we need to find a way to ensure the ring buffer is stable during reading. ioctl(PERF_EVENT_IOC_PAUSE_OUTPUT) is suggested because its overhead is lower than ioctl(PERF_EVENT_IOC_ENABLE). By carefully verifying 'header' pointer, reader can avoid pausing the ring-buffer. For example: /* A union of all possible events */ union perf_event event; p = head = perf_mmap__read_head(); while (true) { /* copy header of next event */ fetch(&event.header, p, sizeof(event.header)); /* read 'head' pointer */ head = perf_mmap__read_head(); /* check overwritten: is the header good? */ if (!verify(sizeof(event.header), p, head)) break; /* copy the whole event */ fetch(&event, p, event.header.size); /* read 'head' pointer again */ head = perf_mmap__read_head(); /* is the whole event good? */ if (!verify(event.header.size, p, head)) break; p += event.header.size; } However, the overhead is high because: a) In-place decoding is not safe. Copying-verifying-decoding is required. b) Fetching 'head' pointer requires additional synchronization. (From Alexei Starovoitov: Even when this trick works, pause is needed for more than stability of reading. When we collect the events into overwrite buffer we're waiting for some other trigger (like all cpu utilization spike or just one cpu running and all others are idle) and when it happens the buffer has valuable info from the past. At this point new events are no longer interesting and buffer should be paused, events read and unpaused until next trigger comes.) This patch utilizes event's default overflow_handler introduced previously. perf_event_output_backward() is created as the default overflow handler for backward ring buffers. To avoid extra overhead to fast path, original perf_event_output() becomes __perf_event_output() and marked '__always_inline'. In theory, there's no extra overhead introduced to fast path. Performance testing: Calling 3000000 times of 'close(-1)', use gettimeofday() to check duration. Use 'perf record -o /dev/null -e raw_syscalls:*' to capture system calls. In ns. Testing environment: CPU : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz Kernel : v4.5.0 MEAN STDVAR BASE 800214.950 2853.083 PRE1 2253846.700 9997.014 PRE2 2257495.540 8516.293 POST 2250896.100 8933.921 Where 'BASE' is pure performance without capturing. 'PRE1' is test result of pure 'v4.5.0' kernel. 'PRE2' is test result before this patch. 'POST' is test result after this patch. See [4] for the detailed experimental setup. Considering the stdvar, this patch doesn't introduce performance overhead to the fast path. [1] http://lkml.iu.edu/hypermail/linux/kernel/1304.1/04584.html [2] http://lkml.iu.edu/hypermail/linux/kernel/1307.1/00535.html [3] http://lkml.iu.edu/hypermail/linux/kernel/1512.0/01265.html [4] http://lkml.kernel.org/g/56F89DCD.1040202@huawei.com Signed-off-by: Wang Nan Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexei Starovoitov Cc: Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Brendan Gregg Cc: He Kuang Cc: Jiri Olsa Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Zefan Li Link: http://lkml.kernel.org/r/1459865478-53413-1-git-send-email-wangnan0@huawei.com [ Fixed the changelog some more. ] Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b8b195fbe787..85749ae8cb5f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -834,14 +834,24 @@ extern int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); +extern void perf_event_output_forward(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); +extern void perf_event_output_backward(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); extern void perf_event_output(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs); + struct perf_sample_data *data, + struct pt_regs *regs); static inline bool is_default_overflow_handler(struct perf_event *event) { - return (event->overflow_handler == perf_event_output); + if (likely(event->overflow_handler == perf_event_output_forward)) + return true; + if (unlikely(event->overflow_handler == perf_event_output_backward)) + return true; + return false; } extern void @@ -1051,8 +1061,20 @@ static inline bool has_aux(struct perf_event *event) return event->pmu->setup_aux; } +static inline bool is_write_backward(struct perf_event *event) +{ + return !!event->attr.write_backward; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); +extern int perf_output_begin_forward(struct perf_output_handle *handle, + struct perf_event *event, + unsigned int size); +extern int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_event *event, + unsigned int size); + extern void perf_output_end(struct perf_output_handle *handle); extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); -- cgit v1.2.3 From cee1afce3053e7aa0793fbd5f2e845fa2cef9e33 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 13 Apr 2016 15:56:50 +0200 Subject: sched/fair: Gather CPU load functions under a more conventional namespace The CPU load update related functions have a weak naming convention currently, starting with update_cpu_load_*() which isn't ideal as "update" is a very generic concept. Since two of these functions are public already (and a third is to come) that's enough to introduce a more conventional naming scheme. So let's do the following rename instead: update_cpu_load_*() -> cpu_load_update_*() Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Chris Metcalf Cc: Christoph Lameter Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1460555812-25375-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 13c1c1d07270..0b7f6028a50b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -178,9 +178,9 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void update_cpu_load_nohz(int active); +extern void cpu_load_update_nohz(int active); #else -static inline void update_cpu_load_nohz(int active) { } +static inline void cpu_load_update_nohz(int active) { } #endif extern void dump_cpu_task(int cpu); -- cgit v1.2.3 From 1f41906a6fda1114debd3898668bd7ab6470ee41 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 13 Apr 2016 15:56:51 +0200 Subject: sched/fair: Correctly handle nohz ticks CPU load accounting Ticks can happen while the CPU is in dynticks-idle or dynticks-singletask mode. In fact "nohz" or "dynticks" only mean that we exit the periodic mode and we try to minimize the ticks as much as possible. The nohz subsystem uses a confusing terminology with the internal state "ts->tick_stopped" which is also available through its public interface with tick_nohz_tick_stopped(). This is a misnomer as the tick is instead reduced with the best effort rather than stopped. In the best case the tick can indeed be actually stopped but there is no guarantee about that. If a timer needs to fire one second later, a tick will fire while the CPU is in nohz mode and this is a very common scenario. Now this confusion happens to be a problem with CPU load updates: cpu_load_update_active() doesn't handle nohz ticks correctly because it assumes that ticks are completely stopped in nohz mode and that cpu_load_update_active() can't be called in dynticks mode. When that happens, the whole previous tickless load is ignored and the function just records the load for the current tick, ignoring potentially long idle periods behind. In order to solve this, we could account the current load for the previous nohz time but there is a risk that we account the load of a task that got freshly enqueued for the whole nohz period. So instead, lets record the dynticks load on nohz frame entry so we know what to record in case of nohz ticks, then use this record to account the tickless load on nohz ticks and nohz frame end. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Chris Metcalf Cc: Christoph Lameter Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1460555812-25375-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0b7f6028a50b..d894f2d61388 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -178,9 +178,11 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void cpu_load_update_nohz(int active); +extern void cpu_load_update_nohz_start(void); +extern void cpu_load_update_nohz_stop(void); #else -static inline void cpu_load_update_nohz(int active) { } +static inline void cpu_load_update_nohz_start(void) { } +static inline void cpu_load_update_nohz_stop(void) { } #endif extern void dump_cpu_task(int cpu); -- cgit v1.2.3 From dfc57732ad38f93ae6232a3b4e64fd077383a0f1 Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Wed, 20 Apr 2016 19:23:43 +0200 Subject: iio:core: mounting matrix support Expose a rotation matrix to indicate userspace the chip placement with respect to the overall hardware system. This is needed to adjust coordinates sampled from a sensor chip when its position deviates from the main hardware system. Final coordinates computation is delegated to userspace since: * computation may involve floating point arithmetics ; * it allows an application to combine adjustments with arbitrary transformations. This 3 dimentional space rotation matrix is expressed as 3x3 array of strings to support floating point numbers. It may be retrieved from a "[_][_]mount_matrix" sysfs attribute file. It is declared into a device / driver specific DTS property or platform data. Signed-off-by: Gregor Boirie Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 0b2773ada0ba..7c29cb0124ae 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -147,6 +147,37 @@ ssize_t iio_enum_write(struct iio_dev *indio_dev, .private = (uintptr_t)(_e), \ } +/** + * struct iio_mount_matrix - iio mounting matrix + * @rotation: 3 dimensional space rotation matrix defining sensor alignment with + * main hardware + */ +struct iio_mount_matrix { + const char *rotation[9]; +}; + +ssize_t iio_show_mount_matrix(struct iio_dev *indio_dev, uintptr_t priv, + const struct iio_chan_spec *chan, char *buf); +int of_iio_read_mount_matrix(const struct device *dev, const char *propname, + struct iio_mount_matrix *matrix); + +typedef const struct iio_mount_matrix * + (iio_get_mount_matrix_t)(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan); + +/** + * IIO_MOUNT_MATRIX() - Initialize mount matrix extended channel attribute + * @_shared: Whether the attribute is shared between all channels + * @_get: Pointer to an iio_get_mount_matrix_t accessor + */ +#define IIO_MOUNT_MATRIX(_shared, _get) \ +{ \ + .name = "mount_matrix", \ + .shared = (_shared), \ + .read = iio_show_mount_matrix, \ + .private = (uintptr_t)(_get), \ +} + /** * struct iio_event_spec - specification for a channel event * @type: Type of the event -- cgit v1.2.3 From 97eacb9166f4810368e180073dcbceeff0de34df Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Wed, 20 Apr 2016 19:23:44 +0200 Subject: iio:ak8975: add mounting matrix support Expose a rotation matrix to indicate userspace the chip orientation with respect to the overall hardware system. Matrix is retrieved from "in_mount_matrix". It is declared into ak8975 DTS entry as a "mount-matrix" property. Signed-off-by: Gregor Boirie Acked-by: Rob Herring Signed-off-by: Jonathan Cameron --- include/linux/iio/magnetometer/ak8975.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/iio/magnetometer/ak8975.h (limited to 'include/linux') diff --git a/include/linux/iio/magnetometer/ak8975.h b/include/linux/iio/magnetometer/ak8975.h new file mode 100644 index 000000000000..c8400959d197 --- /dev/null +++ b/include/linux/iio/magnetometer/ak8975.h @@ -0,0 +1,16 @@ +#ifndef __IIO_MAGNETOMETER_AK8975_H__ +#define __IIO_MAGNETOMETER_AK8975_H__ + +#include + +/** + * struct ak8975_platform_data - AK8975 magnetometer driver platform data + * @eoc_gpio: data ready event gpio + * @orientation: mounting matrix relative to main hardware + */ +struct ak8975_platform_data { + int eoc_gpio; + struct iio_mount_matrix orientation; +}; + +#endif -- cgit v1.2.3 From eb3798463f71afc77abd25b2f62708be06f7173b Mon Sep 17 00:00:00 2001 From: Gregor Boirie Date: Wed, 20 Apr 2016 19:23:45 +0200 Subject: iio:imu:mpu6050: enhance mounting matrix support Add a new rotation matrix sysfs attribute compliant with IIO core mounting matrix API. Matrix is retrieved from "in_anglvel_mount_matrix" and "in_accel_mount_matrix" sysfs attributes. It is declared into mpu6050 DTS entry as a "mount-matrix" property. Old interface is kept for backward userspace compatibility and may be retrieved from legacy platform_data mechanism only. Signed-off-by: Gregor Boirie Acked-by: Rob Herring Signed-off-by: Jonathan Cameron --- include/linux/platform_data/invensense_mpu6050.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/invensense_mpu6050.h b/include/linux/platform_data/invensense_mpu6050.h index ad3aa7b95f35..554b59801aa8 100644 --- a/include/linux/platform_data/invensense_mpu6050.h +++ b/include/linux/platform_data/invensense_mpu6050.h @@ -16,13 +16,16 @@ /** * struct inv_mpu6050_platform_data - Platform data for the mpu driver - * @orientation: Orientation matrix of the chip + * @orientation: Orientation matrix of the chip (deprecated in favor of + * mounting matrix retrieved from device-tree) * * Contains platform specific information on how to configure the MPU6050 to * work on this platform. The orientation matricies are 3x3 rotation matricies * that are applied to the data to rotate from the mounting orientation to the * platform orientation. The values must be one of 0, 1, or -1 and each row and * column should have exactly 1 non-zero value. + * + * Deprecated in favor of mounting matrix retrieved from device-tree. */ struct inv_mpu6050_platform_data { __s8 orientation[9]; -- cgit v1.2.3 From e9bbe898cbe89b17ad3993c136aa13d0431cd537 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 22 Apr 2016 17:31:19 +0200 Subject: libnl: nla_put_net64(): align on a 64-bit area nla_data() is now aligned on a 64-bit area. The temporary function nla_put_be64_32bit() is removed in this patch. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netfilter/ipset/ip_set.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f48b8a664b0f..83b9a2e0d8d4 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -351,7 +351,8 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) return ((skbinfo->skbmark || skbinfo->skbmarkmask) && nla_put_net64(skb, IPSET_ATTR_SKBMARK, cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask))) || + skbinfo->skbmarkmask), + IPSET_ATTR_PAD)) || (skbinfo->skbprio && nla_put_net32(skb, IPSET_ATTR_SKBPRIO, cpu_to_be32(skbinfo->skbprio))) || @@ -374,9 +375,11 @@ static inline bool ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter) { return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter))) || + cpu_to_be64(ip_set_get_bytes(counter)), + IPSET_ATTR_PAD) || nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter))); + cpu_to_be64(ip_set_get_packets(counter)), + IPSET_ATTR_PAD); } static inline void -- cgit v1.2.3 From 41617e1a8dec9fe082ba5dec26bacb154eb55482 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 24 Apr 2016 00:56:07 -0400 Subject: jbd2: add support for avoiding data writes during transaction commits Currently when filesystem needs to make sure data is on permanent storage before committing a transaction it adds inode to transaction's inode list. During transaction commit, jbd2 writes back all dirty buffers that have allocated underlying blocks and waits for the IO to finish. However when doing writeback for delayed allocated data, we allocate blocks and immediately submit the data. Thus asking jbd2 to write dirty pages just unnecessarily adds more work to jbd2 possibly writing back other redirtied blocks. Add support to jbd2 to allow filesystem to ask jbd2 to only wait for outstanding data writes before committing a transaction and thus avoid unnecessary writes. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fd1083c46c61..39511484ad10 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -403,11 +403,19 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) /* Flags in jbd_inode->i_flags */ #define __JI_COMMIT_RUNNING 0 -/* Commit of the inode data in progress. We use this flag to protect us from +#define __JI_WRITE_DATA 1 +#define __JI_WAIT_DATA 2 + +/* + * Commit of the inode data in progress. We use this flag to protect us from * concurrent deletion of inode. We cannot use reference to inode for this * since we cannot afford doing last iput() on behalf of kjournald */ #define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) +/* Write allocated dirty buffers in this inode before commit */ +#define JI_WRITE_DATA (1 << __JI_WRITE_DATA) +/* Wait for outstanding data writes for this inode before commit */ +#define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** * struct jbd_inode is the structure linking inodes in ordered mode @@ -1270,7 +1278,8 @@ extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit_nested(journal_t *); -extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); +extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode); +extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); -- cgit v1.2.3 From 4b1a9e6934ec6e38138c66c2f73cf6f3695a9c6c Mon Sep 17 00:00:00 2001 From: Ashok Kumar Date: Thu, 21 Apr 2016 05:58:44 -0700 Subject: arm64/perf: Filter common events based on PMCEIDn_EL0 The complete common architectural and micro-architectural event number structure is filtered based on PMCEIDn_EL0 and exposed to /sys using is_visibile function pointer in events attribute_group. To filter the events in is_visible function, pmceid based bitmap is stored in arm_pmu structure and the id field from perf_pmu_events_attr is used to check against the bitmap. The function which derives event bitmap from PMCEIDn_EL0 is executed in the cpus, which has the pmu being initialized, for heterogeneous pmu support. Acked-by: Mark Rutland Signed-off-by: Ashok Kumar Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 4196c90a3c88..d28ac05c7f92 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -105,6 +105,8 @@ struct arm_pmu { struct mutex reserve_mutex; u64 max_period; bool secure_access; /* 32-bit ARM only */ +#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 + DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; struct notifier_block hotplug_nb; -- cgit v1.2.3 From 642aa8cee7b84eee1dc4897e301611cffa6d5775 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 21 Apr 2016 14:28:55 +0530 Subject: PM / OPP: dev_pm_opp_set_sharing_cpus() doesn't depend on CONFIG_OF dev_pm_opp_set_sharing_cpus() doesn't do any DT specific stuff and its declarations are added within the CONFIG_OF ifdef by mistake. Take them out of that. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index cccaf4a29e9f..5b6ad31403a5 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -65,6 +65,7 @@ void dev_pm_opp_put_prop_name(struct device *dev); int dev_pm_opp_set_regulator(struct device *dev, const char *name); void dev_pm_opp_put_regulator(struct device *dev); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); +int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -178,6 +179,11 @@ static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_f return -EINVAL; } +static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +{ + return -ENOSYS; +} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) @@ -186,7 +192,6 @@ void dev_pm_opp_of_remove_table(struct device *dev); int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask); void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask); int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); -int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); #else static inline int dev_pm_opp_of_add_table(struct device *dev) { @@ -210,11 +215,6 @@ static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask { return -ENOSYS; } - -static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) -{ - return -ENOSYS; -} #endif #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 7ba2f2757d84eae533679306f03c93c118437a87 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 23 Apr 2016 22:47:08 +0200 Subject: spi: core: add hook flash_read_supported to spi_master If hook spi_flash_read is implemented the fast flash read feature is enabled for all devices attached to the respective master. In most cases there is just one flash chip, however there are also devices with more than one flash chip, namely some WiFi routers. Then the fast flash read feature can be used for the first chip only. OpenWRT implemented an own handling of this case, using controller_data element of spi_device to hold the information whether fast flash read can be used for a device. This patch adds hook flash_read_supported to spi_master which is used to extend spi_flash_read_supported() by checking whether the fast flash read feature can be used for a specific spi_device. If the hook is not implemented the default behavior is to allow fast flash read for all devices (if spi_flash_read is implemented). Signed-off-by: Heiner Kallweit Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 857a9a1d82b5..1f03483f61e5 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -372,6 +372,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @unprepare_message: undo any work done by prepare_message(). * @spi_flash_read: to support spi-controller hardwares that provide * accelerated interface to read from flash devices. + * @flash_read_supported: spi device supports flash read * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS * number. Any individual value may be -ENOENT for CS lines that * are not GPIOs (driven by the SPI controller itself). @@ -529,6 +530,7 @@ struct spi_master { struct spi_message *message); int (*spi_flash_read)(struct spi_device *spi, struct spi_flash_read_message *msg); + bool (*flash_read_supported)(struct spi_device *spi); /* * These hooks are for drivers that use a generic implementation @@ -1158,7 +1160,9 @@ struct spi_flash_read_message { /* SPI core interface for flash read support */ static inline bool spi_flash_read_supported(struct spi_device *spi) { - return spi->master->spi_flash_read ? true : false; + return spi->master->spi_flash_read && + (!spi->master->flash_read_supported || + spi->master->flash_read_supported(spi)); } int spi_flash_read(struct spi_device *spi, -- cgit v1.2.3 From a558da0916b90c330940a106105d0a6a67cb77f7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 25 Apr 2016 10:25:20 +0200 Subject: ieee802154: use nla_put_u64_64bit() Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/nl802154.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 167342c2ce6b..0f6f6607f592 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -92,6 +92,8 @@ enum { IEEE802154_ATTR_LLSEC_DEV_OVERRIDE, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, + IEEE802154_ATTR_PAD, + __IEEE802154_ATTR_MAX, }; -- cgit v1.2.3 From d4967cf38fbd62467b8fb5cab63d7da1f5907ed7 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 22 Apr 2016 08:41:01 +0300 Subject: qed*: Align statistics names There's a difference in statsitics' names starting at qed and propagating to qede, where egress counters indicate ranges while ingress counters indiciate high-end. Align all statistcs to follow the same conventions - name indicates range. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 67e8c206b2c1..82a7fe011068 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -384,16 +384,16 @@ struct qed_eth_stats { /* port */ u64 rx_64_byte_packets; - u64 rx_127_byte_packets; - u64 rx_255_byte_packets; - u64 rx_511_byte_packets; - u64 rx_1023_byte_packets; - u64 rx_1518_byte_packets; - u64 rx_1522_byte_packets; - u64 rx_2047_byte_packets; - u64 rx_4095_byte_packets; - u64 rx_9216_byte_packets; - u64 rx_16383_byte_packets; + u64 rx_65_to_127_byte_packets; + u64 rx_128_to_255_byte_packets; + u64 rx_256_to_511_byte_packets; + u64 rx_512_to_1023_byte_packets; + u64 rx_1024_to_1518_byte_packets; + u64 rx_1519_to_1522_byte_packets; + u64 rx_1519_to_2047_byte_packets; + u64 rx_2048_to_4095_byte_packets; + u64 rx_4096_to_9216_byte_packets; + u64 rx_9217_to_16383_byte_packets; u64 rx_crc_errors; u64 rx_mac_crtl_frames; u64 rx_pause_frames; -- cgit v1.2.3 From fe7cd2bfdac4d8739bc8665eef040e668e6b428f Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 22 Apr 2016 08:41:03 +0300 Subject: qed*: Conditions for changing link There's some inconsistency in current logic determining whether the link settings of a given interface can be changed; I.e., in all modes other than the so-called `deault' mode the interfaces are forbidden from changing the configuration - but even this rule is not applied to all user APIs that may change the configuration. Instead, let the core-module [qed] decide whether an interface can change the configuration by supporting a new API function. We also revise the current rule, allowing all interfaces to change their configurations while laying the infrastructure for future modes where an interface would be blocked from making such a configuration. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 82a7fe011068..e5de42b62976 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -211,6 +211,16 @@ struct qed_common_ops { void (*simd_handler_clean)(struct qed_dev *cdev, int index); + +/** + * @brief can_link_change - can the instance change the link or not + * + * @param cdev + * + * @return true if link-change is allowed, false otherwise. + */ + bool (*can_link_change)(struct qed_dev *cdev); + /** * @brief set_link - set links according to params * -- cgit v1.2.3 From 6fa01ccd883021105e9f8af7d04b9f156fa3494a Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Fri, 22 Apr 2016 18:36:35 -0700 Subject: skbuff: Add pskb_extract() helper function A pattern of skb usage seen in modules such as RDS-TCP is to extract `to_copy' bytes from the received TCP segment, starting at some offset `off' into a new skb `clone'. This is done in the ->data_ready callback, where the clone skb is queued up for rx on the PF_RDS socket, while the parent TCP segment is returned unchanged back to the TCP engine. The existing code uses the sequence clone = skb_clone(..); pskb_pull(clone, off, ..); pskb_trim(clone, to_copy, ..); with the intention of discarding the first `off' bytes. However, skb_clone() + pskb_pull() implies pksb_expand_head(), which ends up doing a redundant memcpy of bytes that will then get discarded in __pskb_pull_tail(). To avoid this inefficiency, this commit adds pskb_extract() that creates the clone, and memcpy's only the relevant header/frag/frag_list to the start of `clone'. pskb_trim() is then invoked to trim clone down to the requested to_copy bytes. Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index da0ace389fec..a1ce63979ad8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2986,6 +2986,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); +struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, + gfp_t gfp); static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { -- cgit v1.2.3 From 21e8868e661ededd2c45c8ec27f6fd5354b88b71 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 20 Apr 2016 11:20:28 +0100 Subject: drivers: firmware: psci: make two helper functions static psci_power_state_loses_context() and psci_power_state_is_valid are only used internally now, so make them static. Signed-off-by: Jisheng Zhang Signed-off-by: Lorenzo Pieralisi Signed-off-by: Arnd Bergmann --- include/linux/psci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/psci.h b/include/linux/psci.h index 393efe2edf9a..bdea1cb5e1db 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -21,8 +21,6 @@ #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 bool psci_tos_resident_on(int cpu); -bool psci_power_state_loses_context(u32 state); -bool psci_power_state_is_valid(u32 state); int psci_cpu_init_idle(unsigned int cpu); int psci_cpu_suspend_enter(unsigned long index); -- cgit v1.2.3 From 1fcbcc333f1fae6e11cc0839a6e72bc1a3e830bf Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 25 Apr 2016 16:50:14 -0700 Subject: block: copy NOMERGE flag from bio to request bio might have NOMERGE flag set, for example blk_queue_split sets it. When we initiate request, copy this flag too. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 86a38ea1823f..77e5d81f07aa 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -208,7 +208,7 @@ enum rq_flag_bits { #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ - REQ_SECURE | REQ_INTEGRITY) + REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE) #define REQ_CLONE_MASK REQ_COMMON_MASK #define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) -- cgit v1.2.3 From 4899f78a3dccda41ffdaa1a2cbf78209753e0f70 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 6 Apr 2016 12:37:37 -0500 Subject: mailbox/omap: drop legacy platform device support OMAP mailbox devices can no longer be created in legacy non-DT mode, all the relevant code has been cleaned up. The OMAP mailbox driver will only support devices created from DT going forward, so drop the legacy platform device support from the driver. Signed-off-by: Suman Anna Signed-off-by: Jassi Brar --- include/linux/platform_data/mailbox-omap.h | 58 ------------------------------ 1 file changed, 58 deletions(-) delete mode 100644 include/linux/platform_data/mailbox-omap.h (limited to 'include/linux') diff --git a/include/linux/platform_data/mailbox-omap.h b/include/linux/platform_data/mailbox-omap.h deleted file mode 100644 index 4631dbb4255e..000000000000 --- a/include/linux/platform_data/mailbox-omap.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * mailbox-omap.h - * - * Copyright (C) 2013 Texas Instruments, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _PLAT_MAILBOX_H -#define _PLAT_MAILBOX_H - -/* Interrupt register configuration types */ -#define MBOX_INTR_CFG_TYPE1 (0) -#define MBOX_INTR_CFG_TYPE2 (1) - -/** - * struct omap_mbox_dev_info - OMAP mailbox device attribute info - * @name: name of the mailbox device - * @tx_id: mailbox queue id used for transmitting messages - * @rx_id: mailbox queue id on which messages are received - * @irq_id: irq identifier number to use from the hwmod data - * @usr_id: mailbox user id for identifying the interrupt into - * the MPU interrupt controller. - */ -struct omap_mbox_dev_info { - const char *name; - u32 tx_id; - u32 rx_id; - u32 irq_id; - u32 usr_id; -}; - -/** - * struct omap_mbox_pdata - OMAP mailbox platform data - * @intr_type: type of interrupt configuration registers used - while programming mailbox queue interrupts - * @num_users: number of users (processor devices) that the mailbox - * h/w block can interrupt - * @num_fifos: number of h/w fifos within the mailbox h/w block - * @info_cnt: number of mailbox devices for the platform - * @info: array of mailbox device attributes - */ -struct omap_mbox_pdata { - u32 intr_type; - u32 num_users; - u32 num_fifos; - u32 info_cnt; - struct omap_mbox_dev_info *info; -}; - -#endif /* _PLAT_MAILBOX_H */ -- cgit v1.2.3 From dd28216528cf67339cd4f5854166fbd4eefd7fa8 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 6 Apr 2016 18:37:20 -0500 Subject: mailbox/omap: kill omap_mbox_{save/restore}_ctx() functions The omap_mbox_save_ctx() and omap_mbox_restore_ctx() API were previously provided to OMAP mailbox clients to save and restore the mailbox context during system suspend/resume. The save and restore functionality is now implemented through System PM driver callbacks, and there is no need for these functions, so kill these API. Signed-off-by: Suman Anna Signed-off-by: Jassi Brar --- include/linux/omap-mailbox.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-mailbox.h b/include/linux/omap-mailbox.h index 587bbdd31f5a..c726bd833761 100644 --- a/include/linux/omap-mailbox.h +++ b/include/linux/omap-mailbox.h @@ -21,8 +21,6 @@ struct mbox_client; struct mbox_chan *omap_mbox_request_channel(struct mbox_client *cl, const char *chan_name); -void omap_mbox_save_ctx(struct mbox_chan *chan); -void omap_mbox_restore_ctx(struct mbox_chan *chan); void omap_mbox_enable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq); void omap_mbox_disable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq); -- cgit v1.2.3 From f0cdf76c103ffa34ca5ac87dcdef7edffc722cbf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 24 Apr 2016 21:38:14 +0200 Subject: net: remove NETDEV_TX_LOCKED support No more users in the tree, remove NETDEV_TX_LOCKED support. Adds another hole in softnet_stats struct, but better than keeping the unused collision counter around. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1f6d5db471a2..18d8394f2e5d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -106,7 +106,6 @@ enum netdev_tx { __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ NETDEV_TX_OK = 0x00, /* driver took care of packet */ NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/ - NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */ }; typedef enum netdev_tx netdev_tx_t; @@ -831,7 +830,6 @@ struct tc_to_netdev { * the queue before that can happen; it's for obsolete devices and weird * corner cases, but the stack really does a non-trivial amount * of useless work if you return NETDEV_TX_BUSY. - * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required; cannot be NULL. * * netdev_features_t (*ndo_fix_features)(struct net_device *dev, @@ -2737,7 +2735,6 @@ struct softnet_data { /* stats */ unsigned int processed; unsigned int time_squeeze; - unsigned int cpu_collision; unsigned int received_rps; #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; -- cgit v1.2.3 From 9218b44dcc059e08e249f6f7614b8e391eb041d8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:47 +0300 Subject: net/mlx5e: Statistics handling refactoring Redesign ethtool statistics handling and reporting in the driver: 1. Move counters to a separate file (en_stats.h). 2. Remove unnecessary dependencies between stats and strings. 3. Use counter descriptors which hold a name and offset for each counter, and will be used to decide which counters will be exposed. For example when adding a new software counter to ethtool, instead of: 1. Add to stats struct. 2. Add to strings struct in the same order. 3. Change macro defining number of software counters. The only thing needed is to link the new counter to a counter descriptor. VPort counters are a set of hardware traffic counters created automatically for each virtual port opened. PPort counters are a set of counters describing per physical port performance statistics. These counters are gathered from hardware register and divided to groups according to different protocols. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 03f8d719b680..8be44ca777ed 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -59,6 +59,7 @@ #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) +#define MLX5_ST_SZ_QW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 64) #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) -- cgit v1.2.3 From 121fcdc84d8240d4dfe1f737befd5814b12623ee Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:50 +0300 Subject: net/mlx5e: Add link down events counter Expose link_down_events counter through ethtool -S. This counter is read from PPort statistics, then proccessed and stored as a special handling software counter. This counter is stored along software counters since it is the only PPort counter that it's size is not 64 bits. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8be44ca777ed..942bccacd7b7 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1369,6 +1369,7 @@ enum { MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, + MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; -- cgit v1.2.3 From 94cb1ebbafd509210887eea6ced55c40da7b4baa Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 24 Apr 2016 22:51:52 +0300 Subject: net/mlx5e: Add support for RXALL netdev feature Introduce new access register named Ports Check Mask Register (PCMR) to control all HW checks on port. With this register, the driver can enable/disable Hardware FCS validation. When RXALL is enabled/disabled using ndo_set_features, enable/disable fcs check at HW. User can change HW configuration using rx-all flag at ethtool. Signed-off-by: Eran Ben Elisha Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/port.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dcd5ac8d3b14..497a4dbd91b0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -112,6 +112,7 @@ enum { MLX5_REG_PMPE = 0x5010, MLX5_REG_PELC = 0x500e, MLX5_REG_PVLC = 0x500f, + MLX5_REG_PCMR = 0x5041, MLX5_REG_PMLP = 0, /* TBD */ MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index a1d145abd4eb..577e953d0aa7 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -84,4 +84,8 @@ int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode); int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled); + #endif /* __MLX5_PORT_H__ */ -- cgit v1.2.3 From da54d24ec3ef736de04c61a01653776a9750334f Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:53 +0300 Subject: net/mlx5e: Add ethtool support for interface identify (LED blinking) Add the needed hardware command and mlx5_ifc structs for managing LED control. Add set_phys_id ethtool callback to support ethtool -p flag. Signed-off-by: Gal Pressman Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/port.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 497a4dbd91b0..2e8758d1b19e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -116,6 +116,7 @@ enum { MLX5_REG_PMLP = 0, /* TBD */ MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MLCR = 0x902b, }; enum { diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 577e953d0aa7..a364ab1737a0 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -35,6 +35,11 @@ #include +enum mlx5_beacon_duration { + MLX5_BEACON_DURATION_OFF = 0x0, + MLX5_BEACON_DURATION_INF = 0xffff, +}; + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); @@ -53,6 +58,7 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); -- cgit v1.2.3 From bb64143eee8c036a89b31daa4e9bf8360a8bded1 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:54 +0300 Subject: net/mlx5e: Add ethtool support for dump module EEPROM Add query MCIA, PMLP registers infrastructure and commands. Add ethtool support for get_module_info() and get_module_eeprom() callbacks. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 3 ++- include/linux/mlx5/port.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2e8758d1b19e..1a170672c656 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -113,9 +113,10 @@ enum { MLX5_REG_PELC = 0x500e, MLX5_REG_PVLC = 0x500f, MLX5_REG_PCMR = 0x5041, - MLX5_REG_PMLP = 0, /* TBD */ + MLX5_REG_PMLP = 0x5002, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, }; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index a364ab1737a0..7391eb833253 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -40,6 +40,19 @@ enum mlx5_beacon_duration { MLX5_BEACON_DURATION_INF = 0xffff, }; +enum mlx5_module_id { + MLX5_MODULE_ID_SFP = 0x3, + MLX5_MODULE_ID_QSFP = 0xC, + MLX5_MODULE_ID_QSFP_PLUS = 0xD, + MLX5_MODULE_ID_QSFP28 = 0x11, +}; + +#define MLX5_EEPROM_MAX_BYTES 32 +#define MLX5_EEPROM_IDENTIFIER_BYTE_MASK 0x000000ff +#define MLX5_I2C_ADDR_LOW 0x50 +#define MLX5_I2C_ADDR_HIGH 0x51 +#define MLX5_EEPROM_PAGE_LENGTH 256 + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); @@ -93,5 +106,7 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, bool *enabled); +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data); #endif /* __MLX5_PORT_H__ */ -- cgit v1.2.3 From 363501145e3faa650193722fe7047b767ed87172 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 24 Apr 2016 22:51:55 +0300 Subject: net/mlx5e: Add ethtool support for rxvlan-offload (vlan stripping) Use ethtool -K rxvlan to enable/disable C-TAG vlan stripping by hardware. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1a170672c656..2cc5e9fd5913 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -45,6 +45,10 @@ #include #include +enum { + MLX5_RQ_BITMASK_VSD = 1 << 1, +}; + enum { MLX5_BOARD_ID_LEN = 64, MLX5_MAX_NAME_LEN = 16, -- cgit v1.2.3 From 1b223dd391622fde05e03829d813c3c6cc998685 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 24 Apr 2016 22:51:56 +0300 Subject: net/mlx5e: Fix checksum handling for non-stripped vlan packets Now as rx-vlan offload can be disabled, packets can be received with vlan tag not stripped, which means is_first_ethertype_ip will return false, for that we need to check if the hardware reported csum OK so we will report CHECKSUM_UNNECESSARY for those packets. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 942bccacd7b7..6bd429b53b77 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -645,8 +645,9 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 rsvd0[2]; - __be16 wqe_id; + u8 outer_l3_tunneled; + u8 rsvd0; + __be16 wqe_id; u8 lro_tcppsh_abort_dupack; u8 lro_min_ttl; __be16 lro_tcp_win; @@ -659,7 +660,7 @@ struct mlx5_cqe64 { __be16 slid; __be32 flags_rqpn; u8 hds_ip_ext; - u8 l4_hdr_type_etc; + u8 l4_l3_hdr_type; __be16 vlan_info; __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ __be32 imm_inval_pkey; @@ -680,12 +681,22 @@ static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) { - return (cqe->l4_hdr_type_etc >> 4) & 0x7; + return (cqe->l4_l3_hdr_type >> 4) & 0x7; +} + +static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe) +{ + return (cqe->l4_l3_hdr_type >> 2) & 0x3; +} + +static inline u8 cqe_is_tunneled(struct mlx5_cqe64 *cqe) +{ + return cqe->outer_l3_tunneled & 0x1; } static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe) { - return !!(cqe->l4_hdr_type_etc & 0x1); + return !!(cqe->l4_l3_hdr_type & 0x1); } static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) -- cgit v1.2.3 From db0a6fb5d97afe01fd9c47d37c6daa82d4d4001d Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 21 Apr 2016 14:14:01 -0400 Subject: audit: add tty field to LOGIN event The tty field was missing from AUDIT_LOGIN events. Refactor code to create a new function audit_get_tty(), using it to replace the call in audit_log_task_info() and to add it to audit_log_set_loginuid(). Lock and bump the kref to protect it, adding audit_put_tty() alias to decrement it. Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index b40ed5df5542..32cdafb312d8 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -26,6 +26,7 @@ #include #include #include +#include #define AUDIT_INO_UNSET ((unsigned long)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) @@ -343,6 +344,23 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid; } +static inline struct tty_struct *audit_get_tty(struct task_struct *tsk) +{ + struct tty_struct *tty = NULL; + unsigned long flags; + + spin_lock_irqsave(&tsk->sighand->siglock, flags); + if (tsk->signal) + tty = tty_kref_get(tsk->signal->tty); + spin_unlock_irqrestore(&tsk->sighand->siglock, flags); + return tty; +} + +static inline void audit_put_tty(struct tty_struct *tty) +{ + tty_kref_put(tty); +} + extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); extern void __audit_bprm(struct linux_binprm *bprm); @@ -500,6 +518,12 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) { return -1; } +static inline struct tty_struct *audit_get_tty(struct task_struct *tsk) +{ + return NULL; +} +static inline void audit_put_tty(struct tty_struct *tty) +{ } static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, -- cgit v1.2.3 From dd80b54b18db3d76a43558daaa6ea3aa67f5aacd Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 20 Apr 2016 15:39:11 +0200 Subject: USB: LTM also for USB 3.1 LTM is also defined for SS+. The correct test is to check for anything slower than SS not exactly SS. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 6a9a0c28415d..29aba76017ee 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -720,7 +720,7 @@ extern void usb_enable_ltm(struct usb_device *udev); static inline bool usb_device_supports_ltm(struct usb_device *udev) { - if (udev->speed != USB_SPEED_SUPER || !udev->bos || !udev->bos->ss_cap) + if (udev->speed < USB_SPEED_SUPER || !udev->bos || !udev->bos->ss_cap) return false; return udev->bos->ss_cap->bmAttributes & USB_LTM_SUPPORT; } -- cgit v1.2.3 From fca504f6054f2a62d966afde23a0cfbf9e3dc32f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 20 Apr 2016 15:39:12 +0200 Subject: USB: correct intervals for SS+ SS+ also expresses intervals in units of 125ms. Testing must be for SS or faster, not SS exactly. Signed-off-by: Oliver neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 29aba76017ee..7824f4557d50 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1569,7 +1569,7 @@ static inline void usb_fill_bulk_urb(struct urb *urb, * Initializes a interrupt urb with the proper information needed to submit * it to a device. * - * Note that High Speed and SuperSpeed interrupt endpoints use a logarithmic + * Note that High Speed and SuperSpeed(+) interrupt endpoints use a logarithmic * encoding of the endpoint interval, and express polling intervals in * microframes (eight per millisecond) rather than in frames (one per * millisecond). @@ -1595,7 +1595,7 @@ static inline void usb_fill_int_urb(struct urb *urb, urb->complete = complete_fn; urb->context = context; - if (dev->speed == USB_SPEED_HIGH || dev->speed == USB_SPEED_SUPER) { + if (dev->speed == USB_SPEED_HIGH || dev->speed >= USB_SPEED_SUPER) { /* make sure interval is within allowed range */ interval = clamp(interval, 1, 16); -- cgit v1.2.3 From dad56ee742a3abbb5d9e8108f8537d412bff3f57 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 26 Apr 2016 21:22:22 -0400 Subject: tracing: Move event_trigger_unlock_commit{_regs}() to local header The functions event_trigger_unlock_commit() and event_trigger_unlock_commit_regs() are no longer used outside the tracing system. Move them out of the generic headers and into the local one. Along with __event_trigger_test_discard() that is only used by them. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 94 -------------------------------------------- 1 file changed, 94 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5f89a5b0c7e6..70a181cb3585 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -452,100 +452,6 @@ trace_trigger_soft_disabled(struct trace_event_file *file) return false; } -/* - * Helper function for event_trigger_unlock_commit{_regs}(). - * If there are event triggers attached to this event that requires - * filtering against its fields, then they wil be called as the - * entry already holds the field information of the current event. - * - * It also checks if the event should be discarded or not. - * It is to be discarded if the event is soft disabled and the - * event was only recorded to process triggers, or if the event - * filter is active and this event did not match the filters. - * - * Returns true if the event is discarded, false otherwise. - */ -static inline bool -__event_trigger_test_discard(struct trace_event_file *file, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - void *entry, - enum event_trigger_type *tt) -{ - unsigned long eflags = file->flags; - - if (eflags & EVENT_FILE_FL_TRIGGER_COND) - *tt = event_triggers_call(file, entry); - - if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags)) - ring_buffer_discard_commit(buffer, event); - else if (!filter_check_discard(file, entry, buffer, event)) - return false; - - return true; -} - -/** - * event_trigger_unlock_commit - handle triggers and finish event commit - * @file: The file pointer assoctiated to the event - * @buffer: The ring buffer that the event is being written to - * @event: The event meta data in the ring buffer - * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. - * - * This is a helper function to handle triggers that require data - * from the event itself. It also tests the event against filters and - * if the event is soft disabled and should be discarded. - */ -static inline void -event_trigger_unlock_commit(struct trace_event_file *file, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc) -{ - enum event_trigger_type tt = ETT_NONE; - - if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); - - if (tt) - event_triggers_post_call(file, tt, entry); -} - -/** - * event_trigger_unlock_commit_regs - handle triggers and finish event commit - * @file: The file pointer assoctiated to the event - * @buffer: The ring buffer that the event is being written to - * @event: The event meta data in the ring buffer - * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. - * - * This is a helper function to handle triggers that require data - * from the event itself. It also tests the event against filters and - * if the event is soft disabled and should be discarded. - * - * Same as event_trigger_unlock_commit() but calls - * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). - */ -static inline void -event_trigger_unlock_commit_regs(struct trace_event_file *file, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc, - struct pt_regs *regs) -{ - enum event_trigger_type tt = ETT_NONE; - - if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit_regs(file->tr, buffer, event, - irq_flags, pc, regs); - - if (tt) - event_triggers_post_call(file, tt, entry); -} - #ifdef CONFIG_BPF_EVENTS unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); #else -- cgit v1.2.3 From da20dfe6b50ea4c1a82797b7ee8655a370535d73 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Apr 2016 12:53:29 -0700 Subject: fs: fix over-zealous use of "const" When I was fixing up const recommendations from checkpatch.pl, I went overboard. This fixes the warning (during a W=1 build): include/linux/fs.h:2627:74: warning: type qualifiers ignored on function return type [-Wignored-qualifiers] static inline const char * const kernel_read_file_id_str(enum kernel_read_file_id id) Reported-by: Andy Shevchenko Signed-off-by: Kees Cook Signed-off-by: James Morris --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 90477550b935..9847d5c49a0e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2600,7 +2600,7 @@ static const char * const kernel_read_file_str[] = { __kernel_read_file_id(__fid_stringify) }; -static inline const char * const kernel_read_file_id_str(enum kernel_read_file_id id) +static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) { if (id < 0 || id >= READING_MAX_ID) return kernel_read_file_str[READING_UNKNOWN]; -- cgit v1.2.3 From c5dfd78eb79851e278b7973031b9ca363da87a7e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 21 Apr 2016 12:28:50 -0300 Subject: perf core: Allow setting up max frame stack depth via sysctl The default remains 127, which is good for most cases, and not even hit most of the time, but then for some cases, as reported by Brendan, 1024+ deep frames are appearing on the radar for things like groovy, ruby. And in some workloads putting a _lower_ cap on this may make sense. One that is per event still needs to be put in place tho. The new file is: # cat /proc/sys/kernel/perf_event_max_stack 127 Chaging it: # echo 256 > /proc/sys/kernel/perf_event_max_stack # cat /proc/sys/kernel/perf_event_max_stack 256 But as soon as there is some event using callchains we get: # echo 512 > /proc/sys/kernel/perf_event_max_stack -bash: echo: write error: Device or resource busy # Because we only allocate the callchain percpu data structures when there is a user, which allows for changing the max easily, its just a matter of having no callchain users at that point. Reported-and-Tested-by: Brendan Gregg Reviewed-by: Frederic Weisbecker Acked-by: Alexei Starovoitov Acked-by: David Ahern Cc: Adrian Hunter Cc: Alexander Shishkin Cc: He Kuang Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/r/20160426002928.GB16708@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 85749ae8cb5f..a090700cccca 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -58,7 +58,7 @@ struct perf_guest_info_callbacks { struct perf_callchain_entry { __u64 nr; - __u64 ip[PERF_MAX_STACK_DEPTH]; + __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ }; struct perf_raw_record { @@ -993,9 +993,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); +extern int sysctl_perf_event_max_stack; + static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) { - if (entry->nr < PERF_MAX_STACK_DEPTH) { + if (entry->nr < sysctl_perf_event_max_stack) { entry->ip[entry->nr++] = ip; return 0; } else { @@ -1017,6 +1019,8 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +int perf_event_max_stack_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); static inline bool perf_paranoid_tracepoint_raw(void) { -- cgit v1.2.3 From 65da9a0a3bf2202c2432f42d41eb908f2fa30579 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 27 Apr 2016 10:13:46 -0400 Subject: tracing: Make filter_check_discard() local Nothing outside of the tracing directory calls filter_check_discard() or check_filter_check_discard(). They should not be called by modules. Move their prototypes into the local tracing header and remove their EXPORT_SYMBOL() macros. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 70a181cb3585..bb383af35cc7 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -413,12 +413,6 @@ enum event_trigger_type { extern int filter_match_preds(struct event_filter *filter, void *rec); -extern int filter_check_discard(struct trace_event_file *file, void *rec, - struct ring_buffer *buffer, - struct ring_buffer_event *event); -extern int call_filter_check_discard(struct trace_event_call *call, void *rec, - struct ring_buffer *buffer, - struct ring_buffer_event *event); extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, void *rec); extern void event_triggers_post_call(struct trace_event_file *file, -- cgit v1.2.3 From 188e3c5cd2b672620291e64a21f1598fe91e40b6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Apr 2016 11:56:04 +0200 Subject: tty: provide tty_name() even without CONFIG_TTY The audit subsystem just started printing the name of the tty, but that causes a build failure when CONFIG_TTY is disabled: kernel/built-in.o: In function `audit_log_task_info': memremap.c:(.text+0x5e34c): undefined reference to `tty_name' kernel/built-in.o: In function `audit_set_loginuid': memremap.c:(.text+0x63b34): undefined reference to `tty_name' This adds tty_name() to the list of functions that are provided as trivial stubs in that configuration. Signed-off-by: Arnd Bergmann Fixes: db0a6fb5d97a ("audit: add tty field to LOGIN event") Signed-off-by: Paul Moore --- include/linux/tty.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index d9fb4b043f56..a93cce297832 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -371,6 +371,7 @@ extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); +extern const char *tty_name(const struct tty_struct *tty); #else static inline void console_init(void) { } @@ -391,6 +392,8 @@ static inline struct tty_struct *get_current_tty(void) /* tty_io.c */ static inline int __init tty_init(void) { return 0; } +static inline const char *tty_name(const struct tty_struct *tty) +{ return "(none)"; } #endif extern struct ktermios tty_std_termios; @@ -415,7 +418,6 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) return tty; } -extern const char *tty_name(const struct tty_struct *tty); extern const char *tty_driver_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); extern int __tty_check_change(struct tty_struct *tty, int sig); -- cgit v1.2.3 From 501e7ef569f4ea2dc7e50773cf6a5d757c94f9b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Apr 2016 15:30:07 -0700 Subject: net-rfs: fix false sharing accessing sd->input_queue_head sd->input_queue_head is incremented for each processed packet in process_backlog(), and read from other cpus performing Out Of Order avoidance in get_rps_cpu() Moving this field in a separate cache line keeps it mostly hot for the cpu in process_backlog(), as other cpus will only read it. In a stress test, process_backlog() was consuming 6.80 % of cpu cycles, and the patch reduced the cost to 0.65 % Signed-off-by: Eric Dumazet Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 18d8394f2e5d..934ca866562d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2747,11 +2747,15 @@ struct softnet_data { struct sk_buff *completion_queue; #ifdef CONFIG_RPS - /* Elements below can be accessed between CPUs for RPS */ + /* input_queue_head should be written by cpu owning this struct, + * and only read by other cpus. Worth using a cache line. + */ + unsigned int input_queue_head ____cacheline_aligned_in_smp; + + /* Elements below can be accessed between CPUs for RPS/RFS */ struct call_single_data csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; unsigned int cpu; - unsigned int input_queue_head; unsigned int input_queue_tail; #endif unsigned int dropped; -- cgit v1.2.3 From c5b591e96db9d99d0126acf93f24e1fb8b368343 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:33 +0100 Subject: efi: Get rid of the EFI_SYSTEM_TABLES status bit The EFI_SYSTEM_TABLES status bit is set by all EFI supporting architectures upon discovery of the EFI system table, but the bit is never tested in any code we have in the tree. So remove it. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Leif Lindholm Cc: Luck, Tony Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 1626474567ac..1545098b0565 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1000,7 +1000,6 @@ extern int __init efi_setup_pcdp_console(char *); * possible, remove EFI-related code altogether. */ #define EFI_BOOT 0 /* Were we booted from EFI? */ -#define EFI_SYSTEM_TABLES 1 /* Can we use EFI system tables? */ #define EFI_CONFIG_TABLES 2 /* Can we use EFI config tables? */ #define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ #define EFI_MEMMAP 4 /* Can we use EFI memory map? */ -- cgit v1.2.3 From 78ce248faa3c46e24e9bd42db3ab3650659f16dd Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Apr 2016 21:06:38 +0100 Subject: efi: Iterate over efi.memmap in for_each_efi_memory_desc() Most of the users of for_each_efi_memory_desc() are equally happy iterating over the EFI memory map in efi.memmap instead of 'memmap', since the former is usually a pointer to the latter. For those users that want to specify an EFI memory map other than efi.memmap, that can be done using for_each_efi_memory_desc_in_map(). One such example is in the libstub code where the firmware is queried directly for the memory map, it gets iterated over, and then freed. This change goes part of the way toward deleting the global 'memmap' variable, which is not universally available on all architectures (notably IA64) and is rather poorly named. Signed-off-by: Matt Fleming Reviewed-by: Ard Biesheuvel Cc: Borislav Petkov Cc: Leif Lindholm Cc: Mark Salter Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-7-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 1545098b0565..17ef4471e603 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -958,11 +958,20 @@ static inline void efi_fake_memmap(void) { } #endif /* Iterate through an efi_memory_map */ -#define for_each_efi_memory_desc(m, md) \ +#define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \ (md) = (void *)(md) + (m)->desc_size) +/** + * for_each_efi_memory_desc - iterate over descriptors in efi.memmap + * @md: the efi_memory_desc_t * iterator + * + * Once the loop finishes @md must not be accessed. + */ +#define for_each_efi_memory_desc(md) \ + for_each_efi_memory_desc_in_map(efi.memmap, md) + /* * Format an EFI memory descriptor's type and attributes to a user-provided * character buffer, as per snprintf(), and return the buffer. -- cgit v1.2.3 From 884f4f66ffd6ffe632f3a8be4e6d10a858afdc37 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Apr 2016 21:06:39 +0100 Subject: efi: Remove global 'memmap' EFI memory map Abolish the poorly named EFI memory map, 'memmap'. It is shadowed by a bunch of local definitions in various files and having two ways to access the EFI memory map ('efi.memmap' vs. 'memmap') is rather confusing. Furthermore, IA64 doesn't even provide this global object, which has caused issues when trying to write generic EFI memmap code. Replace all occurrences with efi.memmap, and convert the remaining iterator code to use for_each_efi_mem_desc(). Signed-off-by: Matt Fleming Reviewed-by: Ard Biesheuvel Cc: Borislav Petkov Cc: Luck, Tony Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-8-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 17ef4471e603..c2c0da49876e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -883,7 +883,7 @@ extern struct efi { efi_get_next_high_mono_count_t *get_next_high_mono_count; efi_reset_system_t *reset_system; efi_set_virtual_address_map_t *set_virtual_address_map; - struct efi_memory_map *memmap; + struct efi_memory_map memmap; unsigned long flags; } efi; @@ -945,7 +945,6 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, extern void efi_get_time(struct timespec *now); extern void efi_reserve_boot_services(void); extern int efi_get_fdt_params(struct efi_fdt_params *params); -extern struct efi_memory_map memmap; extern struct kobject *efi_kobj; extern int efi_reboot_quirk_mode; @@ -970,7 +969,7 @@ static inline void efi_fake_memmap(void) { } * Once the loop finishes @md must not be accessed. */ #define for_each_efi_memory_desc(md) \ - for_each_efi_memory_desc_in_map(efi.memmap, md) + for_each_efi_memory_desc_in_map(&efi.memmap, md) /* * Format an EFI memory descriptor's type and attributes to a user-provided -- cgit v1.2.3 From a604af075a3226adaff84b7026876f0c6dfe9f52 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:44 +0100 Subject: efi: Add support for the EFI_MEMORY_ATTRIBUTES_TABLE config table This declares the GUID and struct typedef for the new memory attributes table which contains the permissions that can be used to apply stricter permissions to UEFI Runtime Services memory regions. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Catalin Marinas Cc: Leif Lindholm Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-13-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index c2c0da49876e..81af5feba1f7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -623,6 +623,10 @@ void efi_native_runtime_setup(void); EFI_GUID(0x3152bca5, 0xeade, 0x433d, \ 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44) +#define EFI_MEMORY_ATTRIBUTES_TABLE_GUID \ + EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, \ + 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) + typedef struct { efi_guid_t guid; u64 table; @@ -847,6 +851,14 @@ typedef struct { #define EFI_INVALID_TABLE_ADDR (~0UL) +typedef struct { + u32 version; + u32 num_entries; + u32 desc_size; + u32 reserved; + efi_memory_desc_t entry[0]; +} efi_memory_attributes_table_t; + /* * All runtime access to EFI goes through this structure: */ @@ -868,6 +880,7 @@ extern struct efi { unsigned long config_table; /* config tables */ unsigned long esrt; /* ESRT table */ unsigned long properties_table; /* properties table */ + unsigned long mem_attr_table; /* memory attributes table */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; -- cgit v1.2.3 From 10f0d2f57705350bbbe5f28e9292ae3905823c3c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:45 +0100 Subject: efi: Implement generic support for the Memory Attributes table This implements shared support for discovering the presence of the Memory Attributes table, and for parsing and validating its contents. The table is validated against the construction rules in the UEFI spec. Since this is a new table, it makes sense to complain if we encounter a table that does not follow those rules. The parsing and validation routine takes a callback that can be specified per architecture, that gets passed each unique validated region, with the virtual address retrieved from the ordinary memory map. Signed-off-by: Ard Biesheuvel [ Trim pr_*() strings to 80 cols and use EFI consistently. ] Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: Catalin Marinas Cc: Leif Lindholm Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-14-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 81af5feba1f7..e29a31d0fc35 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -969,6 +969,19 @@ extern void __init efi_fake_memmap(void); static inline void efi_fake_memmap(void) { } #endif +/* + * efi_memattr_perm_setter - arch specific callback function passed into + * efi_memattr_apply_permissions() that updates the + * mapping permissions described by the second + * argument in the page tables referred to by the + * first argument. + */ +typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *); + +extern int efi_memattr_init(void); +extern int efi_memattr_apply_permissions(struct mm_struct *mm, + efi_memattr_perm_setter fn); + /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ -- cgit v1.2.3 From 2c23b73c2d0249c499c4784b6db08dcfc6b7b3b0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:48 +0100 Subject: x86/efi: Prepare GOP handling code for reuse as generic code In preparation of moving this code to drivers/firmware/efi and reusing it on ARM and arm64, apply any changes that will be required to make this code build for other architectures. This should make it easier to track down problems that this move may cause to its operation on x86. Note that the generic version uses slightly different ways of casting the protocol methods and some other variables to the correct types, since such method calls are not loosely typed on ARM and arm64 as they are on x86. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: David Herrmann Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-17-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index e29a31d0fc35..c2949909339b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -1352,5 +1353,9 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, efi_status_t efi_parse_options(char *cmdline); +efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, + struct screen_info *si, efi_guid_t *proto, + unsigned long size); + bool efi_runtime_disabled(void); #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From fc37206427ce38eafbeff48099d873235e878450 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:49 +0100 Subject: efi/libstub: Move Graphics Output Protocol handling to generic code The Graphics Output Protocol code executes in the stub, so create a generic version based on the x86 version in libstub so that we can move other archs to it in subsequent patches. The new source file gop.c is added to the libstub build for all architectures, but only wired up for x86. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: David Herrmann Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-18-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index c2949909339b..9203bbb28887 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -283,7 +283,8 @@ typedef struct { efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **); void *__reserved; void *register_protocol_notify; - void *locate_handle; + efi_status_t (*locate_handle)(int, efi_guid_t *, void *, + unsigned long *, efi_handle_t *); void *locate_device_path; void *install_configuration_table; void *load_image; @@ -628,6 +629,10 @@ void efi_native_runtime_setup(void); EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, \ 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) +#define EFI_CONSOLE_OUT_DEVICE_GUID \ + EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, \ + 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) + typedef struct { efi_guid_t guid; u64 table; @@ -1214,6 +1219,80 @@ struct efi_simple_text_output_protocol { void *test_string; }; +#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 +#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 +#define PIXEL_BIT_MASK 2 +#define PIXEL_BLT_ONLY 3 +#define PIXEL_FORMAT_MAX 4 + +struct efi_pixel_bitmask { + u32 red_mask; + u32 green_mask; + u32 blue_mask; + u32 reserved_mask; +}; + +struct efi_graphics_output_mode_info { + u32 version; + u32 horizontal_resolution; + u32 vertical_resolution; + int pixel_format; + struct efi_pixel_bitmask pixel_information; + u32 pixels_per_scan_line; +} __packed; + +struct efi_graphics_output_protocol_mode_32 { + u32 max_mode; + u32 mode; + u32 info; + u32 size_of_info; + u64 frame_buffer_base; + u32 frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_mode_64 { + u32 max_mode; + u32 mode; + u64 info; + u64 size_of_info; + u64 frame_buffer_base; + u64 frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_mode { + u32 max_mode; + u32 mode; + unsigned long info; + unsigned long size_of_info; + u64 frame_buffer_base; + unsigned long frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_32 { + u32 query_mode; + u32 set_mode; + u32 blt; + u32 mode; +}; + +struct efi_graphics_output_protocol_64 { + u64 query_mode; + u64 set_mode; + u64 blt; + u64 mode; +}; + +struct efi_graphics_output_protocol { + unsigned long query_mode; + unsigned long set_mode; + unsigned long blt; + struct efi_graphics_output_protocol_mode *mode; +}; + +typedef efi_status_t (*efi_graphics_output_protocol_query_mode)( + struct efi_graphics_output_protocol *, u32, unsigned long *, + struct efi_graphics_output_mode_info **); + extern struct list_head efivar_sysfs_list; static inline void -- cgit v1.2.3 From 801820bee9bccb7c156af2b95c7208f428a06ae7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 25 Apr 2016 21:06:53 +0100 Subject: efi/arm/libstub: Make screen_info accessible to the UEFI stub In order to hand over the framebuffer described by the GOP protocol and discovered by the UEFI stub, make struct screen_info accessible by the stub. This involves allocating a loader data buffer and passing it to the kernel proper via a UEFI Configuration Table, since the UEFI stub executes in the context of the decompressor, and cannot access the kernel's copy of struct screen_info directly. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Borislav Petkov Cc: David Herrmann Cc: Mark Rutland Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-22-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 9203bbb28887..e53458842245 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -286,7 +286,7 @@ typedef struct { efi_status_t (*locate_handle)(int, efi_guid_t *, void *, unsigned long *, efi_handle_t *); void *locate_device_path; - void *install_configuration_table; + efi_status_t (*install_configuration_table)(efi_guid_t *, void *); void *load_image; void *start_image; void *exit; @@ -633,6 +633,15 @@ void efi_native_runtime_setup(void); EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, \ 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) +/* + * This GUID is used to pass to the kernel proper the struct screen_info + * structure that was populated by the stub based on the GOP protocol instance + * associated with ConOut + */ +#define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID \ + EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, \ + 0xb9, 0xe, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) + typedef struct { efi_guid_t guid; u64 table; -- cgit v1.2.3 From 06f7d4a1618dbb086e738c93cd1ef416ab01027d Mon Sep 17 00:00:00 2001 From: "Compostella, Jeremy" Date: Mon, 25 Apr 2016 21:06:57 +0100 Subject: efibc: Add EFI Bootloader Control module This module installs a reboot callback, such that if reboot() is invoked with a string argument NNN, "NNN" is copied to the "LoaderEntryOneShot" EFI variable, to be read by the bootloader. If the string matches one of the boot labels defined in its configuration, the bootloader will boot once to that label. The "LoaderEntryRebootReason" EFI variable is set with the reboot reason: "reboot", "shutdown". The bootloader reads this reboot reason and takes particular action according to its policy. There are reboot implementations that do "reboot ", such as Android's reboot command and Upstart's reboot replacement, which pass the reason as an argument to the reboot syscall. There is no platform-agnostic way how those could be modified to pass the reason to the bootloader, regardless of platform or bootloader. Signed-off-by: Jeremy Compostella Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Peter Zijlstra Cc: Stefan Stanacar Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-26-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index e53458842245..4db7052b2699 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -642,6 +642,10 @@ void efi_native_runtime_setup(void); EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, \ 0xb9, 0xe, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) +#define LINUX_EFI_LOADER_ENTRY_GUID \ + EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, \ + 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) + typedef struct { efi_guid_t guid; u64 table; -- cgit v1.2.3 From 806b0351c9ff9890c1ef0ba2c46237baef49ac79 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Apr 2016 21:06:58 +0100 Subject: efi: Move efi_status_to_err() to drivers/firmware/efi/ Move efi_status_to_err() to the architecture independent code as it's generally useful in all bits of EFI code where there is a need to convert an efi_status_t to a kernel error value. Signed-off-by: Matt Fleming Acked-by: Ard Biesheuvel Cc: Borislav Petkov Cc: Kweh Hock Leong Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: joeyli Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-27-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4db7052b2699..ca47481885c4 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1080,6 +1080,8 @@ static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} #endif +extern int efi_status_to_err(efi_status_t status); + /* * Variable Attributes */ -- cgit v1.2.3 From f0133f3c5b8bb34ec4dec50c27e7a655aeee8935 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Apr 2016 21:06:59 +0100 Subject: efi: Add 'capsule' update support The EFI capsule mechanism allows data blobs to be passed to the EFI firmware. A common use case is performing firmware updates. This patch just introduces the main infrastructure for interacting with the firmware, and a driver that allows users to upload capsules will come in a later patch. Once a capsule has been passed to the firmware, the next reboot must be performed using the ResetSystem() EFI runtime service, which may involve overriding the reboot type specified by reboot=. This ensures the reset value returned by QueryCapsuleCapabilities() is used to reset the system, which is required for the capsule to be processed. efi_capsule_pending() is provided for this purpose. At the moment we only allow a single capsule blob to be sent to the firmware despite the fact that UpdateCapsule() takes a 'CapsuleCount' parameter. This simplifies the API and shouldn't result in any downside since it is still possible to send multiple capsules by repeatedly calling UpdateCapsule(). Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Bryan O'Donoghue Cc: Kweh Hock Leong Cc: Mark Salter Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: joeyli Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-28-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index ca47481885c4..a3b4c1ec38c0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -124,6 +124,13 @@ typedef struct { u32 imagesize; } efi_capsule_header_t; +/* + * EFI capsule flags + */ +#define EFI_CAPSULE_PERSIST_ACROSS_RESET 0x00010000 +#define EFI_CAPSULE_POPULATE_SYSTEM_TABLE 0x00020000 +#define EFI_CAPSULE_INITIATE_RESET 0x00040000 + /* * Allocation types for calls to boottime->allocate_pages. */ @@ -1370,6 +1377,13 @@ int efivars_sysfs_init(void); #define EFIVARS_DATA_SIZE_MAX 1024 #endif /* CONFIG_EFI_VARS */ +extern bool efi_capsule_pending(int *reset_type); + +extern int efi_capsule_supported(efi_guid_t guid, u32 flags, + size_t size, int *reset); + +extern int efi_capsule_update(efi_capsule_header_t *capsule, + struct page **pages); #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); -- cgit v1.2.3 From 87615a34d561ef59bd0cffc73256a21220dfdffd Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Apr 2016 21:07:00 +0100 Subject: x86/efi: Force EFI reboot to process pending capsules If an EFI capsule has been sent to the firmware we must match the type of EFI reset against that required by the capsule to ensure it is processed correctly. Force an EFI reboot if a capsule is pending for the next reset. Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Kweh Hock Leong Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: joeyli Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1461614832-17633-29-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index a3b4c1ec38c0..aa36fb8bea4b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1085,6 +1085,12 @@ static inline bool efi_enabled(int feature) } static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} + +static inline bool +efi_capsule_pending(int *reset_type) +{ + return false; +} #endif extern int efi_status_to_err(efi_status_t status); -- cgit v1.2.3 From f98db6013c557c216da5038d9c52045be55cd039 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 09:39:06 -0700 Subject: sched/core: Add switch_mm_irqs_off() and use it in the scheduler By default, this is the same thing as switch_mm(). x86 will override it as an optimization. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/df401df47bdd6be3e389c6f1e3f5310d70e81b2c.1461688545.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/mmu_context.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index 70fffeba7495..a4441784503b 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -1,9 +1,16 @@ #ifndef _LINUX_MMU_CONTEXT_H #define _LINUX_MMU_CONTEXT_H +#include + struct mm_struct; void use_mm(struct mm_struct *mm); void unuse_mm(struct mm_struct *mm); +/* Architectures that care about IRQ state in switch_mm can override this. */ +#ifndef switch_mm_irqs_off +# define switch_mm_irqs_off switch_mm +#endif + #endif -- cgit v1.2.3 From 3358d2d9f47af86bdd71edb24b361f72a54ec04e Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Thu, 18 Jun 2015 17:28:40 -0400 Subject: clk: tegra: Add interface to enable hardware control of SATA/XUSB PLLs On Tegra210, hardware control of the SATA and XUSB pad PLLs must be done during the UPHY enable sequence rather than the PLLE enable sequence. Export functions to do this so that hardware control can be enabled from the XUSB padctl driver. Signed-off-by: Andrew Bresticker Signed-off-by: Rhyland Klein Signed-off-by: Thierry Reding --- include/linux/clk/tegra.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index 57bf7aab4516..7007a5f48080 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -121,4 +121,9 @@ static inline void tegra_cpu_clock_resume(void) } #endif +extern void tegra210_xusb_pll_hw_control_enable(void); +extern void tegra210_xusb_pll_hw_sequence_start(void); +extern void tegra210_sata_pll_hw_control_enable(void); +extern void tegra210_sata_pll_hw_sequence_start(void); + #endif /* __LINUX_CLK_TEGRA_H_ */ -- cgit v1.2.3 From d708b384c06dddeb35d46a6127c08a7403fa2faf Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Apr 2016 08:52:21 +0530 Subject: PM / OPP: -ENOSYS is applicable only to syscalls Some of the routines have used -ENOSYS for the cases where the functionality isn't implemented in the kernel. But ENOSYS is supposed to be used only for syscalls. Replace that with -ENOTSUPP, which specifically means that the operation isn't supported. While at it, replace exiting -EINVAL errors for similar cases to -ENOTSUPP. Signed-off-by: Viresh Kumar Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 5b6ad31403a5..a2df8f6fcc25 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -110,25 +110,25 @@ static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOTSUPP); } static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOTSUPP); } static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOTSUPP); } static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq) @@ -148,40 +148,40 @@ static inline int dev_pm_opp_disable(struct device *dev, unsigned long freq) static inline struct srcu_notifier_head *dev_pm_opp_get_notifier( struct device *dev) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOTSUPP); } static inline int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_put_supported_hw(struct device *dev) {} static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_put_prop_name(struct device *dev) {} static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_put_regulator(struct device *dev) {} static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { - return -EINVAL; + return -ENOTSUPP; } static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) { - return -ENOSYS; + return -ENOTSUPP; } #endif /* CONFIG_PM_OPP */ @@ -195,7 +195,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask #else static inline int dev_pm_opp_of_add_table(struct device *dev) { - return -EINVAL; + return -ENOTSUPP; } static inline void dev_pm_opp_of_remove_table(struct device *dev) @@ -204,7 +204,7 @@ static inline void dev_pm_opp_of_remove_table(struct device *dev) static inline int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask) { - return -ENOSYS; + return -ENOTSUPP; } static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask) @@ -213,7 +213,7 @@ static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask) static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) { - return -ENOSYS; + return -ENOTSUPP; } #endif -- cgit v1.2.3 From dde370b23c1787a9c723ac049d56a3014937f889 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Apr 2016 08:52:22 +0530 Subject: PM / OPP: Mark cpumask as const in dev_pm_opp_set_sharing_cpus() dev_pm_opp_set_sharing_cpus() isn't supposed to update the cpumask passed as its parameter, and so it should always have been marked 'const'. Do it now. Signed-off-by: Viresh Kumar Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index a2df8f6fcc25..0c08ed3836b1 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -65,7 +65,7 @@ void dev_pm_opp_put_prop_name(struct device *dev); int dev_pm_opp_set_regulator(struct device *dev, const char *name); void dev_pm_opp_put_regulator(struct device *dev); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); -int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); +int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -179,7 +179,7 @@ static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_f return -ENOTSUPP; } -static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask) { return -ENOTSUPP; } -- cgit v1.2.3 From 6f707daa3833761110a03478cba5cc4b708ec77d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Apr 2016 08:52:23 +0530 Subject: PM / OPP: Add dev_pm_opp_get_sharing_cpus() OPP core allows a platform to mark OPP table as shared, when the platform isn't using operating-points-v2 bindings. And, so there should be a non DT way of finding out if the OPP table is shared or not. This patch adds dev_pm_opp_get_sharing_cpus(), which first tries to get OPP sharing information from the opp-table (in case it is already marked as shared), otherwise it uses the existing DT way of finding sharing information. Signed-off-by: Viresh Kumar Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0c08ed3836b1..15f554443b59 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -66,6 +66,7 @@ int dev_pm_opp_set_regulator(struct device *dev, const char *name); void dev_pm_opp_put_regulator(struct device *dev); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask); +int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -184,6 +185,11 @@ static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpum return -ENOTSUPP; } +static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +{ + return -EINVAL; +} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) -- cgit v1.2.3 From eb96924acddc709db58221c210ca05cd9effb1df Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 Apr 2016 08:52:26 +0530 Subject: cpufreq: dt: Kill platform-data There are no more users of platform-data for cpufreq-dt driver, get rid of it. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq-dt.h | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 include/linux/cpufreq-dt.h (limited to 'include/linux') diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h deleted file mode 100644 index a87335a1660c..000000000000 --- a/include/linux/cpufreq-dt.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) 2014 Marvell - * Thomas Petazzoni - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __CPUFREQ_DT_H__ -#define __CPUFREQ_DT_H__ - -#include - -struct cpufreq_dt_platform_data { - /* - * True when each CPU has its own clock to control its - * frequency, false when all CPUs are controlled by a single - * clock. - */ - bool independent_clocks; -}; - -#endif /* __CPUFREQ_DT_H__ */ -- cgit v1.2.3 From feb26ac31a2a5cb88d86680d9a94916a6343e9e6 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Mon, 25 Apr 2016 13:48:38 +0100 Subject: usb: core: hub: hub_port_init lock controller instead of bus The XHCI controller presents two USB buses to the system - one for USB2 and one for USB3. The hub init code (hub_port_init) is reentrant but only locks one bus per thread, leading to a race condition failure when two threads attempt to simultaneously initialise a USB2 and USB3 device: [ 8.034843] xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command [ 13.183701] usb 3-3: device descriptor read/all, error -110 On a test system this failure occurred on 6% of all boots. The call traces at the point of failure are: Call Trace: [] schedule+0x37/0x90 [] usb_kill_urb+0x8d/0xd0 [] ? wake_up_atomic_t+0x30/0x30 [] usb_start_wait_urb+0xbe/0x150 [] usb_control_msg+0xbc/0xf0 [] hub_port_init+0x51e/0xb70 [] hub_event+0x817/0x1570 [] process_one_work+0x1ff/0x620 [] ? process_one_work+0x15f/0x620 [] worker_thread+0x64/0x4b0 [] ? rescuer_thread+0x390/0x390 [] kthread+0x105/0x120 [] ? kthread_create_on_node+0x200/0x200 [] ret_from_fork+0x3f/0x70 [] ? kthread_create_on_node+0x200/0x200 Call Trace: [] xhci_setup_device+0x53d/0xa40 [] xhci_address_device+0xe/0x10 [] hub_port_init+0x1bf/0xb70 [] ? trace_hardirqs_on+0xd/0x10 [] hub_event+0x817/0x1570 [] process_one_work+0x1ff/0x620 [] ? process_one_work+0x15f/0x620 [] worker_thread+0x64/0x4b0 [] ? rescuer_thread+0x390/0x390 [] kthread+0x105/0x120 [] ? kthread_create_on_node+0x200/0x200 [] ret_from_fork+0x3f/0x70 [] ? kthread_create_on_node+0x200/0x200 Which results from the two call chains: hub_port_init usb_get_device_descriptor usb_get_descriptor usb_control_msg usb_internal_control_msg usb_start_wait_urb usb_submit_urb / wait_for_completion_timeout / usb_kill_urb hub_port_init hub_set_address xhci_address_device xhci_setup_device Mathias Nyman explains the current behaviour violates the XHCI spec: hub_port_reset() will end up moving the corresponding xhci device slot to default state. As hub_port_reset() is called several times in hub_port_init() it sounds reasonable that we could end up with two threads having their xhci device slots in default state at the same time, which according to xhci 4.5.3 specs still is a big no no: "Note: Software shall not transition more than one Device Slot to the Default State at a time" So both threads fail at their next task after this. One fails to read the descriptor, and the other fails addressing the device. Fix this in hub_port_init by locking the USB controller (instead of an individual bus) to prevent simultaneous initialisation of both buses. Fixes: 638139eb95d2 ("usb: hub: allow to process more usb hub events in parallel") Link: https://lkml.org/lkml/2016/2/8/312 Link: https://lkml.org/lkml/2016/2/4/748 Signed-off-by: Chris Bainbridge Cc: stable Acked-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 3 +-- include/linux/usb/hcd.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7824f4557d50..01b6c61cf9bb 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -374,13 +374,12 @@ struct usb_bus { int devnum_next; /* Next open device number in * round-robin allocation */ + struct mutex devnum_next_mutex; /* devnum_next mutex */ struct usb_devmap devmap; /* device address allocation map */ struct usb_device *root_hub; /* Root hub */ struct usb_bus *hs_companion; /* Companion EHCI bus, if any */ - struct mutex usb_address0_mutex; /* unaddressed device mutex */ - int bandwidth_allocated; /* on this bus: how much of the time * reserved for periodic (intr/iso) * requests is used, on average? diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index b98f831dcda3..66fc13705ab7 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -181,6 +181,7 @@ struct usb_hcd { * bandwidth_mutex should be dropped after a successful control message * to the device, or resetting the bandwidth after a failed attempt. */ + struct mutex *address0_mutex; struct mutex *bandwidth_mutex; struct usb_hcd *shared_hcd; struct usb_hcd *primary_hcd; -- cgit v1.2.3 From 0a2cf20c3fb62ad4717276b5303bf831f7b29d54 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Wed, 27 Apr 2016 23:39:01 -0400 Subject: tcp: remove SKBTX_ACK_TSTAMP since it is redundant The SKBTX_ACK_TSTAMP flag is set in skb_shinfo->tx_flags when the timestamp of the TCP acknowledgement should be reported on error queue. Since accessing skb_shinfo is likely to incur a cache-line miss at the time of receiving the ack, the txstamp_ack bit was added in tcp_skb_cb, which is set iff the SKBTX_ACK_TSTAMP flag is set for an skb. This makes SKBTX_ACK_TSTAMP flag redundant. Remove the SKBTX_ACK_TSTAMP and instead use the txstamp_ack bit everywhere. Note that this frees one bit in shinfo->tx_flags. Signed-off-by: Soheil Hassas Yeganeh Acked-by: Martin KaFai Lau Suggested-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a1ce63979ad8..c84a5a1078c5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -382,14 +382,10 @@ enum { /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, - - /* generate software timestamp on peer data acknowledgment */ - SKBTX_ACK_TSTAMP = 1 << 7, }; #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ - SKBTX_SCHED_TSTAMP | \ - SKBTX_ACK_TSTAMP) + SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) /* -- cgit v1.2.3 From 8453c5cafd32c4d6bd13ec4a62d4b639f4edb222 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 28 Apr 2016 08:21:03 -0700 Subject: ARM: OMAP2+: Add more functions to pwm pdata for ir-rx51 Before we start removing omap3 legacy booting support, let's make n900 DT booting behave the same way for ir-rx51 as the legacy booting does. For now, we need to pass pdata to the ir-rx51 driver. This means that the n900 tree can move to using DT based booting without having to carry all the legacy platform data with it when it gets dropped from the mainline tree. Note that the ir-rx51 driver is currently disabled because of the dependency to !ARCH_MULTIPLATFORM. This will get sorted out later with the help of drivers/pwm/pwm-omap-dmtimer.c. But first we need to add chained IRQ support to dmtimer code to avoid introducing new custom frameworks. So let's just pass the necessary dmtimer functions to ir-rx51 so we can get it working in the following patch. Cc: Neil Armstrong Tested-by: Ivaylo Dimitrov Signed-off-by: Tony Lindgren --- include/linux/platform_data/media/ir-rx51.h | 1 + include/linux/platform_data/pwm_omap_dmtimer.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/media/ir-rx51.h b/include/linux/platform_data/media/ir-rx51.h index 104aa892f31b..3038120ca46e 100644 --- a/include/linux/platform_data/media/ir-rx51.h +++ b/include/linux/platform_data/media/ir-rx51.h @@ -5,6 +5,7 @@ struct lirc_rx51_platform_data { int pwm_timer; int(*set_max_mpu_wakeup_lat)(struct device *dev, long t); + struct pwm_omap_dmtimer_pdata *dmtimer; }; #endif diff --git a/include/linux/platform_data/pwm_omap_dmtimer.h b/include/linux/platform_data/pwm_omap_dmtimer.h index 59384217208f..e7d521e48855 100644 --- a/include/linux/platform_data/pwm_omap_dmtimer.h +++ b/include/linux/platform_data/pwm_omap_dmtimer.h @@ -35,6 +35,16 @@ #ifndef __PWM_OMAP_DMTIMER_PDATA_H #define __PWM_OMAP_DMTIMER_PDATA_H +/* clock sources */ +#define PWM_OMAP_DMTIMER_SRC_SYS_CLK 0x00 +#define PWM_OMAP_DMTIMER_SRC_32_KHZ 0x01 +#define PWM_OMAP_DMTIMER_SRC_EXT_CLK 0x02 + +/* timer interrupt enable bits */ +#define PWM_OMAP_DMTIMER_INT_CAPTURE (1 << 2) +#define PWM_OMAP_DMTIMER_INT_OVERFLOW (1 << 1) +#define PWM_OMAP_DMTIMER_INT_MATCH (1 << 0) + /* trigger types */ #define PWM_OMAP_DMTIMER_TRIGGER_NONE 0x00 #define PWM_OMAP_DMTIMER_TRIGGER_OVERFLOW 0x01 @@ -45,15 +55,23 @@ typedef struct omap_dm_timer pwm_omap_dmtimer; struct pwm_omap_dmtimer_pdata { pwm_omap_dmtimer *(*request_by_node)(struct device_node *np); + pwm_omap_dmtimer *(*request_specific)(int timer_id); + pwm_omap_dmtimer *(*request)(void); + int (*free)(pwm_omap_dmtimer *timer); void (*enable)(pwm_omap_dmtimer *timer); void (*disable)(pwm_omap_dmtimer *timer); + int (*get_irq)(pwm_omap_dmtimer *timer); + int (*set_int_enable)(pwm_omap_dmtimer *timer, unsigned int value); + int (*set_int_disable)(pwm_omap_dmtimer *timer, u32 mask); + struct clk *(*get_fclk)(pwm_omap_dmtimer *timer); int (*start)(pwm_omap_dmtimer *timer); int (*stop)(pwm_omap_dmtimer *timer); + int (*set_source)(pwm_omap_dmtimer *timer, int source); int (*set_load)(pwm_omap_dmtimer *timer, int autoreload, unsigned int value); @@ -63,7 +81,10 @@ struct pwm_omap_dmtimer_pdata { int toggle, int trigger); int (*set_prescaler)(pwm_omap_dmtimer *timer, int prescaler); + unsigned int (*read_counter)(pwm_omap_dmtimer *timer); int (*write_counter)(pwm_omap_dmtimer *timer, unsigned int value); + unsigned int (*read_status)(pwm_omap_dmtimer *timer); + int (*write_status)(pwm_omap_dmtimer *timer, unsigned int value); }; #endif /* __PWM_OMAP_DMTIMER_PDATA_H */ -- cgit v1.2.3 From 40abf9be8f52d440e442206182916e3dcc68f722 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Mon, 11 Apr 2016 15:02:28 -0700 Subject: libnvdimm: increase max envelope size for ioctl nd_ioctl() must first read in the fixed sized portion of an ioctl so that it can then determine the size of the variable part. Prepare for ND_CMD_CALL calls which have larger fixed portion envelope. Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 833867b9ddc2..af31d1c6fdd7 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -27,7 +27,7 @@ enum { /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, ND_CMD_MAX_ELEM = 5, - ND_CMD_MAX_ENVELOPE = 16, + ND_CMD_MAX_ENVELOPE = 256, ND_MAX_MAPPINGS = 32, /* region flag indicating to direct-map persistent memory by default */ -- cgit v1.2.3 From e3654eca70d63704c94a60a2aafc0b3c7b46a00b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 28 Apr 2016 16:17:07 -0700 Subject: nfit, libnvdimm: clarify "commands" vs "_DSMs" Clarify the distinction between "commands", the ioctls userspace calls to request the kernel take some action on a given dimm device, and "_DSMs", the actual function numbers used in the firmware interface to the DIMM. _DSMs are ACPI specific whereas commands are Linux kernel generic. This is in preparation for breaking the 1:1 implicit relationship between the kernel ioctl number space and the firmware specific function numbers. Cc: Jerry Hoemann Cc: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index af31d1c6fdd7..0c3c30cbbea5 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -68,7 +68,7 @@ struct nd_mapping { struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; - unsigned long dsm_mask; + unsigned long cmd_mask; char *provider_name; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); @@ -130,10 +130,11 @@ struct nd_region *to_nd_region(struct device *dev); struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); +unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, - unsigned long *dsm_mask); + unsigned long cmd_mask); const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, -- cgit v1.2.3 From 92b4423e3a0bc5d43ecde4bcad871f8b5ba04efd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 29 Apr 2016 10:39:34 +0200 Subject: netfilter: fix IS_ERR_VALUE usage This is a forward-port of the original patch from Andrzej Hajda, he said: "IS_ERR_VALUE should be used only with unsigned long type. Otherwise it can work incorrectly. To achieve this function xt_percpu_counter_alloc is modified to return unsigned long, and its result is assigned to temporary variable to perform error checking, before assigning to .pcnt field. The patch follows conclusion from discussion on LKML [1][2]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2120927 [2]: http://permalink.gmane.org/gmane.linux.kernel/2150581" Original patch from Andrzej is here: http://patchwork.ozlabs.org/patch/582970/ This patch has clashed with input validation fixes for x_tables. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 4dd9306c9d56..dc4f58a3cdcc 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -380,16 +380,16 @@ static inline unsigned long ifname_compare_aligned(const char *_a, * allows us to return 0 for single core systems without forcing * callers to deal with SMP vs. NONSMP issues. */ -static inline u64 xt_percpu_counter_alloc(void) +static inline unsigned long xt_percpu_counter_alloc(void) { if (nr_cpu_ids > 1) { void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), sizeof(struct xt_counters)); if (res == NULL) - return (u64) -ENOMEM; + return -ENOMEM; - return (u64) (__force unsigned long) res; + return (__force unsigned long) res; } return 0; -- cgit v1.2.3 From e81591815de05572ed28cbdca631d4d97f0bd059 Mon Sep 17 00:00:00 2001 From: Jiang Qiu Date: Thu, 28 Apr 2016 17:32:01 +0800 Subject: gpio: dwapb: remove name from dwapb_port_property This patch removed the name property from dwapb_port_property. The name property is redundant, since we can get this info from dwapb_gpio dev node. Reviewed-by: Andy Shevchenko Signed-off-by: Jiang Qiu Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-dwapb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-dwapb.h b/include/linux/platform_data/gpio-dwapb.h index 28702c849af1..955b5790d24a 100644 --- a/include/linux/platform_data/gpio-dwapb.h +++ b/include/linux/platform_data/gpio-dwapb.h @@ -16,7 +16,6 @@ struct dwapb_port_property { struct device_node *node; - const char *name; unsigned int idx; unsigned int ngpio; unsigned int gpio_base; -- cgit v1.2.3 From 4ba8cfa79f44a9489b1d562430cb70fb53200adb Mon Sep 17 00:00:00 2001 From: Jiang Qiu Date: Thu, 28 Apr 2016 17:32:02 +0800 Subject: gpio: dwapb: convert device node to fwnode This patch converts device node to fwnode for dwapb driver, so as to provide a unified fwnode for DT and ACPI bindings. Tested-by: Alan Tull Acked-by: Andy Shevchenko Signed-off-by: Jiang Qiu Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-dwapb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-dwapb.h b/include/linux/platform_data/gpio-dwapb.h index 955b5790d24a..2dc7f4a8ab09 100644 --- a/include/linux/platform_data/gpio-dwapb.h +++ b/include/linux/platform_data/gpio-dwapb.h @@ -15,7 +15,7 @@ #define GPIO_DW_APB_H struct dwapb_port_property { - struct device_node *node; + struct fwnode_handle *fwnode; unsigned int idx; unsigned int ngpio; unsigned int gpio_base; -- cgit v1.2.3 From 1140f7c8994a3a2a0d7c4972509d98b792617d39 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 5 Apr 2016 17:17:34 +0200 Subject: phy: core: Allow children node to be overridden In order to more flexibly support device tree bindings, allow drivers to override the container of the child nodes. By default the device node of the PHY provider is assumed to be the parent for children, but bindings may decide to add additional levels for better organization. Acked-by: Kishon Vijay Abraham I Signed-off-by: Thierry Reding --- include/linux/phy/phy.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 8cf05e341cff..a810f2a18842 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -77,6 +77,7 @@ struct phy { */ struct phy_provider { struct device *dev; + struct device_node *children; struct module *owner; struct list_head list; struct phy * (*of_xlate)(struct device *dev, @@ -93,10 +94,16 @@ struct phy_lookup { #define to_phy(a) (container_of((a), struct phy, dev)) #define of_phy_provider_register(dev, xlate) \ - __of_phy_provider_register((dev), THIS_MODULE, (xlate)) + __of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate)) #define devm_of_phy_provider_register(dev, xlate) \ - __devm_of_phy_provider_register((dev), THIS_MODULE, (xlate)) + __devm_of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate)) + +#define of_phy_provider_register_full(dev, children, xlate) \ + __of_phy_provider_register(dev, children, THIS_MODULE, xlate) + +#define devm_of_phy_provider_register_full(dev, children, xlate) \ + __devm_of_phy_provider_register(dev, children, THIS_MODULE, xlate) static inline void phy_set_drvdata(struct phy *phy, void *data) { @@ -147,11 +154,13 @@ struct phy *devm_phy_create(struct device *dev, struct device_node *node, void phy_destroy(struct phy *phy); void devm_phy_destroy(struct device *dev, struct phy *phy); struct phy_provider *__of_phy_provider_register(struct device *dev, - struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + struct device_node *children, struct module *owner, + struct phy * (*of_xlate)(struct device *dev, + struct of_phandle_args *args)); struct phy_provider *__devm_of_phy_provider_register(struct device *dev, - struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + struct device_node *children, struct module *owner, + struct phy * (*of_xlate)(struct device *dev, + struct of_phandle_args *args)); void of_phy_provider_unregister(struct phy_provider *phy_provider); void devm_of_phy_provider_unregister(struct device *dev, struct phy_provider *phy_provider); @@ -312,15 +321,17 @@ static inline void devm_phy_destroy(struct device *dev, struct phy *phy) } static inline struct phy_provider *__of_phy_provider_register( - struct device *dev, struct module *owner, struct phy * (*of_xlate)( - struct device *dev, struct of_phandle_args *args)) + struct device *dev, struct device_node *children, struct module *owner, + struct phy * (*of_xlate)(struct device *dev, + struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } static inline struct phy_provider *__devm_of_phy_provider_register(struct device - *dev, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + *dev, struct device_node *children, struct module *owner, + struct phy * (*of_xlate)(struct device *dev, + struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } -- cgit v1.2.3 From 53d2a715c24034ee4017f3c15c82bb4a53a07da5 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 11 Nov 2015 18:24:21 +0100 Subject: phy: Add Tegra XUSB pad controller support Add a new driver for the XUSB pad controller found on NVIDIA Tegra SoCs. This hardware block used to be exposed as a pin controller, but it turns out that this isn't a good fit. The new driver and DT binding much more accurately describe the hardware and are more flexible in supporting new SoC generations. Acked-by: Kishon Vijay Abraham I Signed-off-by: Thierry Reding --- include/linux/phy/tegra/xusb.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/phy/tegra/xusb.h (limited to 'include/linux') diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h new file mode 100644 index 000000000000..8e1a57a78d9f --- /dev/null +++ b/include/linux/phy/tegra/xusb.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef PHY_TEGRA_XUSB_H +#define PHY_TEGRA_XUSB_H + +struct tegra_xusb_padctl; +struct device; + +struct tegra_xusb_padctl *tegra_xusb_padctl_get(struct device *dev); +void tegra_xusb_padctl_put(struct tegra_xusb_padctl *padctl); + +int tegra_xusb_padctl_usb3_save_context(struct tegra_xusb_padctl *padctl, + unsigned int port); +int tegra_xusb_padctl_hsic_set_idle(struct tegra_xusb_padctl *padctl, + unsigned int port, bool idle); +int tegra_xusb_padctl_usb3_set_lfps_detect(struct tegra_xusb_padctl *padctl, + unsigned int port, bool enable); + +#endif /* PHY_TEGRA_XUSB_H */ -- cgit v1.2.3 From f4b05d27ec6b032ca504591e2a157b058b6f172f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 28 Apr 2016 17:59:28 +0200 Subject: net: constify is_skb_forwardable's arguments is_skb_forwardable is not supposed to change anything so constify its arguments Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 934ca866562d..52914a854386 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3264,7 +3264,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); +bool is_skb_forwardable(const struct net_device *dev, + const struct sk_buff *skb); extern int netdev_budget; -- cgit v1.2.3 From fa66ddb870ca022342fe6d1312ef76d2f7233a1d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 28 Apr 2016 12:04:13 -0400 Subject: tracing: Move trace_buffer_unlock_commit{_regs}() to local header The functions trace_buffer_unlock_commit() and the _regs() version are only used within the kernel/trace directory. Move them to the local header and remove the export as well. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index bb383af35cc7..48cc5e19c5f5 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -158,15 +158,6 @@ struct ring_buffer_event * trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, int type, unsigned long len, unsigned long flags, int pc); -void trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_buffer_unlock_commit_regs(struct trace_array *tr, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc, - struct pt_regs *regs); void trace_current_buffer_discard_commit(struct ring_buffer *buffer, struct ring_buffer_event *event); -- cgit v1.2.3 From a9fe48dcde88fd48e210e4280f19cb9300ec9112 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 29 Apr 2016 16:12:39 -0400 Subject: tracing: Remove unused function trace_current_buffer_discard_commit() The function trace_current_buffer_discard_commit() has no callers, remove it. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 48cc5e19c5f5..356c39b3abbb 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -158,8 +158,6 @@ struct ring_buffer_event * trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, int type, unsigned long len, unsigned long flags, int pc); -void trace_current_buffer_discard_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event); void tracing_record_cmdline(struct task_struct *tsk); -- cgit v1.2.3 From d745098cedb3f5c6a554796d4a3a505abd4ebaa6 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 29 Apr 2016 01:36:33 +0300 Subject: net/mlx5: Introduce modify flow rule destination This API is used for modifying the flow rule destination. This is needed for modifying the pointed flow table by the traffic type classifier rules to point on the aRFS tables. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 8dec5508d93d..28a5b662ab6a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -113,4 +113,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, struct mlx5_flow_destination *dest); void mlx5_del_flow_rule(struct mlx5_flow_rule *fr); +int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest); + #endif -- cgit v1.2.3 From d63cd28608bb563d52e62990fa01c016e8dbdb75 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 29 Apr 2016 01:36:35 +0300 Subject: net/mlx5: Add user chosen levels when allocating flow tables Currently, consumers of the flow steering infrastructure can't choose their own flow table levels and are limited to one flow table per level. This just waste levels. Instead, we introduce here the possibility to use multiple flow tables in a level. The user is free to connect these flow tables, while following the rule (FTEs in FT of level x could only point to FTs of level y where y > x). In addition this patch switch the order of the create/destroy flow tables of the NIC(vlan and main). Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/fs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 28a5b662ab6a..165ff4f9cc6a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -82,12 +82,14 @@ struct mlx5_flow_table * mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - int max_num_groups); + int max_num_groups, + u32 level); struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, - int num_flow_table_entries); + int num_flow_table_entries, + u32 level); int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); /* inbox should be set with the following values: -- cgit v1.2.3 From 5a7b27eb9cf3986f487469b57a3a41286d2e7100 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 29 Apr 2016 01:36:39 +0300 Subject: net/mlx5: Initializing CPU reverse mapping Allocating CPU rmap and add entry for each IRQ. CPU rmap is used in aRFS to get the RX queue number of the RX completion interrupts. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96a428dcac9f..d5529449ef47 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -560,6 +560,9 @@ struct mlx5_core_dev { struct mlx5_profile *profile; atomic_t num_qps; u32 issi; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif }; struct mlx5_db { -- cgit v1.2.3 From f09d90864eb7cc00cadbbfd083b4eff84c167981 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Thu, 28 Apr 2016 16:34:08 +0200 Subject: livepatch: make object/func-walking helpers more robust Current object-walking helper checks the presence of obj->funcs to determine the end of objs array in klp_object structure. This is somewhat fragile because one can easily forget about funcs definition during livepatch creation. In such a case the livepatch module is successfully loaded and all objects after the incorrect one are omitted. This is very confusing. Let's make the helper more robust and check also for the other external member, name. Thus the helper correctly stops on an empty item of the array. We need to have a check for obj->funcs in klp_init_object() to make it work. The same applies to a func-walking helper. As a benefit we'll check for new_func member definition during the livepatch initialization. There is no such check anywhere in the code now. [jkosina@suse.cz: fix shortlog] Signed-off-by: Miroslav Benes Acked-by: Josh Poimboeuf Acked-by: Jessica Yu Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index bd830d590465..90843ccb7852 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -124,10 +124,12 @@ struct klp_patch { }; #define klp_for_each_object(patch, obj) \ - for (obj = patch->objs; obj->funcs; obj++) + for (obj = patch->objs; obj->funcs || obj->name; obj++) #define klp_for_each_func(obj, func) \ - for (func = obj->funcs; func->old_name; func++) + for (func = obj->funcs; \ + func->old_name || func->new_func || func->old_sympos; \ + func++) int klp_register_patch(struct klp_patch *); int klp_unregister_patch(struct klp_patch *); -- cgit v1.2.3 From 904d1857ad09b43f514897dd42daffe200d1ca50 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 29 Apr 2016 18:11:54 -0400 Subject: tracing: Remove unused function trace_current_buffer_lock_reserve() trace_current_buffer_lock_reserve() has no more users. Remove it. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 356c39b3abbb..3111a1efdad6 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -154,10 +154,6 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, struct trace_event_file *trace_file, int type, unsigned long len, unsigned long flags, int pc); -struct ring_buffer_event * -trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, - int type, unsigned long len, - unsigned long flags, int pc); void tracing_record_cmdline(struct task_struct *tsk); -- cgit v1.2.3 From 3ed605bc8a0a688d8750a1e2eff39c854418c5cf Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Tue, 26 Apr 2016 12:32:27 -0300 Subject: kernel.h: add u64_to_user_ptr() This function had copies in 3 different files. Unify them in kernel.h. Cc: Joe Perches Cc: Andrew Morton Cc: David Airlie Cc: Daniel Vetter Cc: Rob Clark Signed-off-by: Gustavo Padovan Acked-by: Daniel Vetter [drm/i915/] Acked-by: Rob Clark [drm/msm/] Acked-by: Lucas Stach [drm/etinav/] Acked-by: Maarten Lankhorst Signed-off-by: Greg Kroah-Hartman --- include/linux/kernel.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2f7775e229b0..f3e45cb0b6a2 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -53,6 +53,13 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) +#define u64_to_user_ptr(x) ( \ +{ \ + typecheck(u64, x); \ + (void __user *)(uintptr_t)x; \ +} \ +) + /* * This looks more complex than it should be. But we need to * get the type for the ~ right in round_down (it needs to be -- cgit v1.2.3 From 460bfc41fd52959311ed0328163f785e023857af Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Thu, 28 Apr 2016 10:46:57 -0300 Subject: dma-buf/sync_file: de-stage sync_file headers Move sync_file headers file to include/ dir. Signed-off-by: Gustavo Padovan Reviewed-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- include/linux/sync_file.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 include/linux/sync_file.h (limited to 'include/linux') diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h new file mode 100644 index 000000000000..c6ffe8b0725c --- /dev/null +++ b/include/linux/sync_file.h @@ -0,0 +1,57 @@ +/* + * include/linux/sync_file.h + * + * Copyright (C) 2012 Google, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_SYNC_FILE_H +#define _LINUX_SYNC_FILE_H + +#include +#include +#include +#include +#include +#include + +struct sync_file_cb { + struct fence_cb cb; + struct fence *fence; + struct sync_file *sync_file; +}; + +/** + * struct sync_file - sync file to export to the userspace + * @file: file representing this fence + * @kref: reference count on fence. + * @name: name of sync_file. Useful for debugging + * @sync_file_list: membership in global file list + * @num_fences: number of sync_pts in the fence + * @wq: wait queue for fence signaling + * @status: 0: signaled, >0:active, <0: error + * @cbs: sync_pts callback information + */ +struct sync_file { + struct file *file; + struct kref kref; + char name[32]; +#ifdef CONFIG_DEBUG_FS + struct list_head sync_file_list; +#endif + int num_fences; + + wait_queue_head_t wq; + atomic_t status; + + struct sync_file_cb cbs[]; +}; + +struct sync_file *sync_file_create(struct fence *fence); + +#endif /* _LINUX_SYNC_H */ -- cgit v1.2.3 From d3feb406733544dbf0e239ef945a09decdceac88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 14 Apr 2016 11:37:43 +0200 Subject: phy: bcm-ns-usb2: new driver for USB 2.0 PHY on Northstar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Northstar is a family of SoCs used in home routers. They have USB 2.0 and 3.0 controllers with PHYs that need to be properly initialized. This driver provides PHY init support in a generic way and can be bound with an EHCI controller driver. There are (just a few) registers being defined in bcma header. It's because DMU/CRU registers will be also needed in other drivers. We will need them e.g. in PCIe controller/PHY driver and at some point probably in clock driver for BCM53573 chipset. By using include/linux/bcma/ we avoid code duplication. Signed-off-by: Rafał Miłecki Signed-off-by: Kishon Vijay Abraham I --- include/linux/bcma/bcma.h | 1 + include/linux/bcma/bcma_driver_arm_c9.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 include/linux/bcma/bcma_driver_arm_c9.h (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 0367c63f5960..e6b41f42602b 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/bcma/bcma_driver_arm_c9.h b/include/linux/bcma/bcma_driver_arm_c9.h new file mode 100644 index 000000000000..93bd73d670d5 --- /dev/null +++ b/include/linux/bcma/bcma_driver_arm_c9.h @@ -0,0 +1,15 @@ +#ifndef LINUX_BCMA_DRIVER_ARM_C9_H_ +#define LINUX_BCMA_DRIVER_ARM_C9_H_ + +/* DMU (Device Management Unit) */ +#define BCMA_DMU_CRU_USB2_CONTROL 0x0164 +#define BCMA_DMU_CRU_USB2_CONTROL_USB_PLL_NDIV_MASK 0x00000FFC +#define BCMA_DMU_CRU_USB2_CONTROL_USB_PLL_NDIV_SHIFT 2 +#define BCMA_DMU_CRU_USB2_CONTROL_USB_PLL_PDIV_MASK 0x00007000 +#define BCMA_DMU_CRU_USB2_CONTROL_USB_PLL_PDIV_SHIFT 12 +#define BCMA_DMU_CRU_CLKSET_KEY 0x0180 +#define BCMA_DMU_CRU_STRAPS_CTRL 0x02A0 +#define BCMA_DMU_CRU_STRAPS_CTRL_USB3 0x00000010 +#define BCMA_DMU_CRU_STRAPS_CTRL_4BYTE 0x00008000 + +#endif /* LINUX_BCMA_DRIVER_ARM_C9_H_ */ -- cgit v1.2.3 From 71f5c63c07e5be7abdce40891778ffbf3cec04f0 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 23 Mar 2016 12:09:18 +0100 Subject: phy: exynos-mipi-video: Add support for Exynos 5420 and 5433 SoCs This patch adds support for MIPI DPHYs found in Exynos5420-compatible (5420, 5422 and 5800) and Exynos5433 SoCs. Those SoCs differs from earlier by different offset of MIPI DPHY registers in PMU controllers (Exynos 5420-compatible case) or by moving MIPI DPHY reset registers to separate system register controllers (Exynos 5433 case). In both case also additional 5th PHY (MIPI CSIS 2) has been added. Acked-by: Sylwester Nawrocki Signed-off-by: Marek Szyprowski Signed-off-by: Kishon Vijay Abraham I --- include/linux/mfd/syscon/exynos5-pmu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/exynos5-pmu.h b/include/linux/mfd/syscon/exynos5-pmu.h index 9352adc95de6..76f30f940c70 100644 --- a/include/linux/mfd/syscon/exynos5-pmu.h +++ b/include/linux/mfd/syscon/exynos5-pmu.h @@ -38,6 +38,9 @@ /* Exynos5433 specific register definitions */ #define EXYNOS5433_USBHOST30_PHY_CONTROL (0x728) +#define EXYNOS5433_MIPI_PHY0_CONTROL (0x710) +#define EXYNOS5433_MIPI_PHY1_CONTROL (0x714) +#define EXYNOS5433_MIPI_PHY2_CONTROL (0x718) #define EXYNOS5_PHY_ENABLE BIT(0) -- cgit v1.2.3 From 18900ca65a8553edc608b6c9d518eb31e6c09ba1 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Apr 2016 17:06:48 -0700 Subject: tty: Replace TTY_IO_ERROR bit tests with tty_io_error() Abstract TTY_IO_ERROR status test treewide with tty_io_error(). NB: tty->flags uses atomic bit ops; replace non-atomic bit test with test_bit(). Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 3b09f235db66..68d829bf93b8 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -360,6 +360,11 @@ static inline void tty_set_flow_change(struct tty_struct *tty, int val) smp_mb(); } +static inline bool tty_io_error(struct tty_struct *tty) +{ + return test_bit(TTY_IO_ERROR, &tty->flags); +} + #ifdef CONFIG_TTY extern void console_init(void); extern void tty_kref_put(struct tty_struct *tty); -- cgit v1.2.3 From 97ef38b8210d7459d4cb51668cdf3983772ac6b7 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Apr 2016 17:11:36 -0700 Subject: tty: Replace TTY_THROTTLED bit tests with tty_throttled() Abstract TTY_THROTTLED bit tests with tty_throttled(). Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 68d829bf93b8..89f9c91b40f5 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -365,6 +365,11 @@ static inline bool tty_io_error(struct tty_struct *tty) return test_bit(TTY_IO_ERROR, &tty->flags); } +static inline bool tty_throttled(struct tty_struct *tty) +{ + return test_bit(TTY_THROTTLED, &tty->flags); +} + #ifdef CONFIG_TTY extern void console_init(void); extern void tty_kref_put(struct tty_struct *tty); -- cgit v1.2.3 From e4d38f334ad24f80229a8ebab26950de8e8f34d7 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Apr 2016 17:53:20 -0700 Subject: tty: Define ASYNC_ replacement bits Prepare for relocating kernel private state bits out of tty_port::flags field; tty_port::flags field is not atomic and can become corrupted by concurrent updates. It also suffers from the complication of sharing in a userspace-visible field which must be masked. Define new tty_port::iflags field and new, substitute bit definitions for the former ASYNC_* flags. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 89f9c91b40f5..4e0dbda05180 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -228,7 +228,8 @@ struct tty_port { int count; /* Usage count */ wait_queue_head_t open_wait; /* Open waiters */ wait_queue_head_t delta_msr_wait; /* Modem status change */ - unsigned long flags; /* TTY flags ASY_*/ + unsigned long flags; /* User TTY flags ASYNC_ */ + unsigned long iflags; /* Internal flags TTY_PORT_ */ unsigned char console:1, /* port is a console */ low_latency:1; /* optional: tune for latency */ struct mutex mutex; /* Locking */ @@ -242,6 +243,19 @@ struct tty_port { struct kref kref; /* Ref counter */ }; +/* tty_port::iflags bits -- use atomic bit ops */ +#define TTY_PORT_INITIALIZED 0 /* device is initialized */ +#define TTY_PORT_SUSPENDED 1 /* device is suspended */ +#define TTY_PORT_ACTIVE 2 /* device is open */ + +/* + * uart drivers: use the uart_port::status field and the UPSTAT_* defines + * for s/w-based flow control steering and carrier detection status + */ +#define TTY_PORT_CTS_FLOW 3 /* h/w flow control enabled */ +#define TTY_PORT_CHECK_CD 4 /* carrier detect enabled */ + + /* * Where all of the state associated with a tty is kept while the tty * is open. Since the termios state should be kept even if the tty -- cgit v1.2.3 From 5604a98e2f95d6221852960a3363588f40d78e22 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Apr 2016 17:53:21 -0700 Subject: tty: Replace ASYNC_CTS_FLOW bit and update atomically Replace ASYNC_CTS_FLOW bit in the tty_port::flags field with TTY_PORT_CTS_FLOW bit in the tty_port::iflags field. Add tty_port_set_cts_flow() helper to abstract the atomic bit ops. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 4e0dbda05180..989d755b0ae4 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -255,7 +255,6 @@ struct tty_port { #define TTY_PORT_CTS_FLOW 3 /* h/w flow control enabled */ #define TTY_PORT_CHECK_CD 4 /* carrier detect enabled */ - /* * Where all of the state associated with a tty is kept while the tty * is open. Since the termios state should be kept even if the tty @@ -561,9 +560,18 @@ static inline struct tty_port *tty_port_get(struct tty_port *port) /* If the cts flow control is enabled, return true. */ static inline bool tty_port_cts_enabled(struct tty_port *port) { - return port->flags & ASYNC_CTS_FLOW; + return test_bit(TTY_PORT_CTS_FLOW, &port->iflags); } +static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) +{ + if (val) + set_bit(TTY_PORT_CTS_FLOW, &port->iflags); + else + clear_bit(TTY_PORT_CTS_FLOW, &port->iflags); +} + + extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); -- cgit v1.2.3 From 807c8d81f4ec441241cafa3034c58df721fee869 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Apr 2016 17:53:22 -0700 Subject: tty: Replace ASYNC_NORMAL_ACTIVE bit and update atomically Replace ASYNC_NORMAL_ACTIVE bit in the tty_port::flags field with TTY_PORT_ACTIVE bit in the tty_port::iflags field. Introduce helpers tty_port_set_active() and tty_port_active() to abstract atomic bit ops. Extract state changes from port lock sections, as this usage is broken and confused; the state transitions are protected by the tty lock (which mutually excludes parallel open/close/hangup), and no user tests the active state while holding the port lock. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 989d755b0ae4..dbeeb8666ae4 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -571,6 +571,18 @@ static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) clear_bit(TTY_PORT_CTS_FLOW, &port->iflags); } +static inline bool tty_port_active(struct tty_port *port) +{ + return test_bit(TTY_PORT_ACTIVE, &port->iflags); +} + +static inline void tty_port_set_active(struct tty_port *port, bool val) +{ + if (val) + set_bit(TTY_PORT_ACTIVE, &port->iflags); + else + clear_bit(TTY_PORT_ACTIVE, &port->iflags); +} extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); -- cgit v1.2.3 From 2d68655d15bc99981394f7caa769a14b03cac131 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Apr 2016 17:53:23 -0700 Subject: tty: Replace ASYNC_CHECK_CD and update atomically Replace ASYNC_CHECK_CD bit in the tty_port::flags field with TTY_PORT_CHECK_CD bit in the tty_port::iflags field. Introduce helpers tty_port_set_check_carrier() and tty_port_check_carrier() to abstract the atomic bit ops. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index dbeeb8666ae4..4254dfb12fb1 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -584,6 +584,19 @@ static inline void tty_port_set_active(struct tty_port *port, bool val) clear_bit(TTY_PORT_ACTIVE, &port->iflags); } +static inline bool tty_port_check_carrier(struct tty_port *port) +{ + return test_bit(TTY_PORT_CHECK_CD, &port->iflags); +} + +static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) +{ + if (val) + set_bit(TTY_PORT_CHECK_CD, &port->iflags); + else + clear_bit(TTY_PORT_CHECK_CD, &port->iflags); +} + extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); -- cgit v1.2.3 From 80f02d5424301bf4df195d09b1a664f394435851 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Apr 2016 17:53:24 -0700 Subject: tty: Replace ASYNC_SUSPENDED bit and update atomically Replace ASYNC_SUSPENDED bit in the tty_port::flags field with TTY_PORT_SUSPENDED bit in the tty_port::iflags field. Introduce helpers tty_port_set_suspended() and tty_port_suspended() to abstract atomic bit ops. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 4254dfb12fb1..7ac5add66c00 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -597,6 +597,19 @@ static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) clear_bit(TTY_PORT_CHECK_CD, &port->iflags); } +static inline bool tty_port_suspended(struct tty_port *port) +{ + return test_bit(TTY_PORT_SUSPENDED, &port->iflags); +} + +static inline void tty_port_set_suspended(struct tty_port *port, bool val) +{ + if (val) + set_bit(TTY_PORT_SUSPENDED, &port->iflags); + else + clear_bit(TTY_PORT_SUSPENDED, &port->iflags); +} + extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); -- cgit v1.2.3 From d41861ca19c9e96f12a4f1ebbc8255d00909a232 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Apr 2016 17:53:25 -0700 Subject: tty: Replace ASYNC_INITIALIZED bit and update atomically Replace ASYNC_INITIALIZED bit in the tty_port::flags field with TTY_PORT_INITIALIZED bit in the tty_port::iflags field. Introduce helpers tty_port_set_initialized() and tty_port_initialized() to abstract atomic bit ops. Note: the transforms for test_and_set_bit() and test_and_clear_bit() are unnecessary as the state transitions are already mutually exclusive; the tty lock prevents concurrent open/close/hangup. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 7ac5add66c00..bf1bcdb01df0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -610,6 +610,19 @@ static inline void tty_port_set_suspended(struct tty_port *port, bool val) clear_bit(TTY_PORT_SUSPENDED, &port->iflags); } +static inline bool tty_port_initialized(struct tty_port *port) +{ + return test_bit(TTY_PORT_INITIALIZED, &port->iflags); +} + +static inline void tty_port_set_initialized(struct tty_port *port, bool val) +{ + if (val) + set_bit(TTY_PORT_INITIALIZED, &port->iflags); + else + clear_bit(TTY_PORT_INITIALIZED, &port->iflags); +} + extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); -- cgit v1.2.3 From 9ed19428a51d53477e2b79be3303fa08f8575749 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 9 Apr 2016 18:56:34 -0700 Subject: serial: core: Prevent unsafe uart port access, part 2 For serial core operations not already excluded by holding port->mutex, use reference counting to protect deferencing the state->uart_port. Introduce helper functions, uart_port_ref() and uart_port_deref(), to wrap uart_port access, and helper macros, uart_port_lock() and uart_port_unlock(), to wrap combination uart_port access with uart port lock sections. Port removal in uart_remove_one_port() waits for reference count to drop to zero before detaching the uart port from struct uart_state. For functions only reading the tx circular buffer indexes (where the uart port lock is claimed to prevent concurrent users), a NULL uart port is simply ignored and the operation completes normally. For functions change the tx circular buffer indexes (where the uart port lock is claimed to prevent concurrent users), the operation is aborted if the uart port is NULL (ie., has been detached). Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index cbfcf38e220d..fd4ad4dce11a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -281,6 +281,8 @@ struct uart_state { enum uart_pm_state pm_state; struct circ_buf xmit; + atomic_t refcount; + wait_queue_head_t remove_wait; struct uart_port *uart_port; }; -- cgit v1.2.3 From 8ede5cce4f0baff77ef63aa3cb3afc65d0317e0b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 31 Mar 2016 10:08:16 +0200 Subject: tty: vt, make color_table const This means all ->con_set_palette have to have the second parameter const too now. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 2 +- include/linux/selection.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index ea731af2451e..137ac1a1c16f 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -47,7 +47,7 @@ struct consw { int (*con_font_copy)(struct vc_data *, int); int (*con_resize)(struct vc_data *, unsigned int, unsigned int, unsigned int); - int (*con_set_palette)(struct vc_data *, unsigned char *); + int (*con_set_palette)(struct vc_data *, const unsigned char *); int (*con_scrolldelta)(struct vc_data *, int); int (*con_set_origin)(struct vc_data *); void (*con_save_screen)(struct vc_data *); diff --git a/include/linux/selection.h b/include/linux/selection.h index 85193aa8c1e3..7e6c4450b8a5 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -24,7 +24,7 @@ extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry); extern int console_blanked; -extern unsigned char color_table[]; +extern const unsigned char color_table[]; extern int default_red[]; extern int default_grn[]; extern int default_blu[]; -- cgit v1.2.3 From 91e74ca5e7ac4ec6c61b84d6618eb5e401f852f0 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 31 Mar 2016 10:08:17 +0200 Subject: tty: vt, use proper type for default colors Every user of default_red, default_grn, and default_blu treats them as unsigned char. So make it really unsigned char. And indent the initializers and module_param properly. This saves ~ 100 bytes of data. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/selection.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/selection.h b/include/linux/selection.h index 7e6c4450b8a5..8e4624efdb6f 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -25,9 +25,9 @@ extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry); extern int console_blanked; extern const unsigned char color_table[]; -extern int default_red[]; -extern int default_grn[]; -extern int default_blu[]; +extern unsigned char default_red[]; +extern unsigned char default_grn[]; +extern unsigned char default_blu[]; extern unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed); extern u16 screen_glyph(struct vc_data *vc, int offset); -- cgit v1.2.3 From 144ef5c2df9b473dad7eab375adcf5b11d0b1e47 Mon Sep 17 00:00:00 2001 From: Wan Ahmad Zainie Date: Wed, 6 Apr 2016 12:06:51 +0800 Subject: serial: 8250: export get_mctrl function Exposes get_mctrl() function so that it can be overriden with platform specific implementation. Signed-off-by: Wan Ahmad Zainie Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 2 ++ include/linux/serial_core.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 434879759725..48ec7651989b 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -36,6 +36,7 @@ struct plat_serial8250_port { void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); + unsigned int (*get_mctrl)(struct uart_port *); int (*handle_irq)(struct uart_port *); void (*pm)(struct uart_port *, unsigned int state, unsigned old); @@ -148,6 +149,7 @@ extern int early_serial8250_setup(struct earlycon_device *device, const char *options); extern void serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old); +extern unsigned int serial8250_do_get_mctrl(struct uart_port *port); extern int serial8250_do_startup(struct uart_port *port); extern void serial8250_do_shutdown(struct uart_port *port); extern void serial8250_do_pm(struct uart_port *port, unsigned int state, diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index fd4ad4dce11a..a3d7c0d4a03e 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -123,6 +123,7 @@ struct uart_port { void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); + unsigned int (*get_mctrl)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int); int (*startup)(struct uart_port *port); void (*shutdown)(struct uart_port *port); -- cgit v1.2.3 From e4234a1fc343ca35f852bc527fae56fade879d4a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 31 Mar 2016 11:45:06 +0100 Subject: kernfs: Move faulting copy_user operations outside of the mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A fault in a user provided buffer may lead anywhere, and lockdep warns that we have a potential deadlock between the mm->mmap_sem and the kernfs file mutex: [ 82.811702] ====================================================== [ 82.811705] [ INFO: possible circular locking dependency detected ] [ 82.811709] 4.5.0-rc4-gfxbench+ #1 Not tainted [ 82.811711] ------------------------------------------------------- [ 82.811714] kms_setmode/5859 is trying to acquire lock: [ 82.811717] (&dev->struct_mutex){+.+.+.}, at: [] drm_gem_mmap+0x1a1/0x270 [ 82.811731] but task is already holding lock: [ 82.811734] (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x44/0xa0 [ 82.811745] which lock already depends on the new lock. [ 82.811749] the existing dependency chain (in reverse order) is: [ 82.811752] -> #3 (&mm->mmap_sem){++++++}: [ 82.811761] [] lock_acquire+0xc3/0x1d0 [ 82.811766] [] __might_fault+0x75/0xa0 [ 82.811771] [] kernfs_fop_write+0x8a/0x180 [ 82.811787] [] __vfs_write+0x23/0xe0 [ 82.811792] [] vfs_write+0xa4/0x190 [ 82.811797] [] SyS_write+0x44/0xb0 [ 82.811801] [] entry_SYSCALL_64_fastpath+0x16/0x73 [ 82.811807] -> #2 (s_active#6){++++.+}: [ 82.811814] [] lock_acquire+0xc3/0x1d0 [ 82.811819] [] __kernfs_remove+0x210/0x2f0 [ 82.811823] [] kernfs_remove_by_name_ns+0x40/0xa0 [ 82.811828] [] sysfs_remove_file_ns+0x10/0x20 [ 82.811832] [] device_del+0x124/0x250 [ 82.811837] [] device_unregister+0x19/0x60 [ 82.811841] [] cpu_cache_sysfs_exit+0x51/0xb0 [ 82.811846] [] cacheinfo_cpu_callback+0x38/0x70 [ 82.811851] [] notifier_call_chain+0x39/0xa0 [ 82.811856] [] __raw_notifier_call_chain+0x9/0x10 [ 82.811860] [] cpu_notify+0x1e/0x40 [ 82.811865] [] cpu_notify_nofail+0x9/0x20 [ 82.811869] [] _cpu_down+0x233/0x340 [ 82.811874] [] disable_nonboot_cpus+0xc9/0x350 [ 82.811878] [] suspend_devices_and_enter+0x5a1/0xb50 [ 82.811883] [] pm_suspend+0x543/0x8d0 [ 82.811888] [] state_store+0x77/0xe0 [ 82.811892] [] kobj_attr_store+0xf/0x20 [ 82.811897] [] sysfs_kf_write+0x40/0x50 [ 82.811902] [] kernfs_fop_write+0x13c/0x180 [ 82.811906] [] __vfs_write+0x23/0xe0 [ 82.811910] [] vfs_write+0xa4/0x190 [ 82.811914] [] SyS_write+0x44/0xb0 [ 82.811918] [] entry_SYSCALL_64_fastpath+0x16/0x73 [ 82.811923] -> #1 (cpu_hotplug.lock){+.+.+.}: [ 82.811929] [] lock_acquire+0xc3/0x1d0 [ 82.811933] [] mutex_lock_nested+0x62/0x3b0 [ 82.811940] [] get_online_cpus+0x61/0x80 [ 82.811944] [] stop_machine+0x1b/0xe0 [ 82.811949] [] gen8_ggtt_insert_entries__BKL+0x2d/0x30 [i915] [ 82.812009] [] ggtt_bind_vma+0x46/0x70 [i915] [ 82.812045] [] i915_vma_bind+0x140/0x290 [i915] [ 82.812081] [] i915_gem_object_do_pin+0x899/0xb00 [i915] [ 82.812117] [] i915_gem_object_pin+0x35/0x40 [i915] [ 82.812154] [] intel_init_pipe_control+0xbe/0x210 [i915] [ 82.812192] [] intel_logical_rings_init+0xe2/0xde0 [i915] [ 82.812232] [] i915_gem_init+0xf3/0x130 [i915] [ 82.812278] [] i915_driver_load+0xf2d/0x1770 [i915] [ 82.812318] [] drm_dev_register+0xa4/0xb0 [ 82.812323] [] drm_get_pci_dev+0xce/0x1e0 [ 82.812328] [] i915_pci_probe+0x2f/0x50 [i915] [ 82.812360] [] pci_device_probe+0x87/0xf0 [ 82.812366] [] driver_probe_device+0x229/0x450 [ 82.812371] [] __driver_attach+0x83/0x90 [ 82.812375] [] bus_for_each_dev+0x61/0xa0 [ 82.812380] [] driver_attach+0x19/0x20 [ 82.812384] [] bus_add_driver+0x1ef/0x290 [ 82.812388] [] driver_register+0x5b/0xe0 [ 82.812393] [] __pci_register_driver+0x5b/0x60 [ 82.812398] [] drm_pci_init+0xd6/0x100 [ 82.812402] [] 0xffffffffa027c094 [ 82.812406] [] do_one_initcall+0xae/0x1d0 [ 82.812412] [] do_init_module+0x5b/0x1cb [ 82.812417] [] load_module+0x1c20/0x2480 [ 82.812422] [] SyS_finit_module+0x7e/0xa0 [ 82.812428] [] entry_SYSCALL_64_fastpath+0x16/0x73 [ 82.812433] -> #0 (&dev->struct_mutex){+.+.+.}: [ 82.812439] [] __lock_acquire+0x1fc9/0x20f0 [ 82.812443] [] lock_acquire+0xc3/0x1d0 [ 82.812456] [] drm_gem_mmap+0x1c7/0x270 [ 82.812460] [] mmap_region+0x334/0x580 [ 82.812466] [] do_mmap+0x364/0x410 [ 82.812470] [] vm_mmap_pgoff+0x6d/0xa0 [ 82.812474] [] SyS_mmap_pgoff+0x184/0x220 [ 82.812479] [] SyS_mmap+0x1d/0x20 [ 82.812484] [] entry_SYSCALL_64_fastpath+0x16/0x73 [ 82.812489] other info that might help us debug this: [ 82.812493] Chain exists of: &dev->struct_mutex --> s_active#6 --> &mm->mmap_sem [ 82.812502] Possible unsafe locking scenario: [ 82.812506] CPU0 CPU1 [ 82.812508] ---- ---- [ 82.812510] lock(&mm->mmap_sem); [ 82.812514] lock(s_active#6); [ 82.812519] lock(&mm->mmap_sem); [ 82.812522] lock(&dev->struct_mutex); [ 82.812526] *** DEADLOCK *** [ 82.812531] 1 lock held by kms_setmode/5859: [ 82.812533] #0: (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x44/0xa0 [ 82.812541] stack backtrace: [ 82.812547] CPU: 0 PID: 5859 Comm: kms_setmode Not tainted 4.5.0-rc4-gfxbench+ #1 [ 82.812550] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0040.2015.0814.1353 08/14/2015 [ 82.812553] 0000000000000000 ffff880079407bf0 ffffffff813f8505 ffffffff825fb270 [ 82.812560] ffffffff825c4190 ffff880079407c30 ffffffff810c84ac ffff880079407c90 [ 82.812566] ffff8800797ed328 ffff8800797ecb00 0000000000000001 ffff8800797ed350 [ 82.812573] Call Trace: [ 82.812578] [] dump_stack+0x67/0x92 [ 82.812582] [] print_circular_bug+0x1fc/0x310 [ 82.812586] [] __lock_acquire+0x1fc9/0x20f0 [ 82.812590] [] lock_acquire+0xc3/0x1d0 [ 82.812594] [] ? drm_gem_mmap+0x1a1/0x270 [ 82.812599] [] drm_gem_mmap+0x1c7/0x270 [ 82.812603] [] ? drm_gem_mmap+0x1a1/0x270 [ 82.812608] [] mmap_region+0x334/0x580 [ 82.812612] [] do_mmap+0x364/0x410 [ 82.812616] [] vm_mmap_pgoff+0x6d/0xa0 [ 82.812629] [] SyS_mmap_pgoff+0x184/0x220 [ 82.812633] [] SyS_mmap+0x1d/0x20 [ 82.812637] [] entry_SYSCALL_64_fastpath+0x16/0x73 Highly unlikely though this scenario is, we can avoid the issue entirely by moving the copy operation from out under the kernfs_get_active() tracking by assigning the preallocated buffer its own mutex. The temporary buffer allocation doesn't require mutex locking as it is entirely local. The locked section was extended by the addition of the preallocated buf to speed up md user operations in commit 2b75869bba676c248d8d25ae6d2bd9221dfffdb6 Author: NeilBrown Date: Mon Oct 13 16:41:28 2014 +1100 sysfs/kernfs: allow attributes to request write buffer be pre-allocated. Reported-by: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94350 Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Cc: Ville Syrjälä Cc: Joonas Lahtinen Cc: NeilBrown Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index c06c44242f39..d306e282bb1d 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -177,6 +177,7 @@ struct kernfs_open_file { /* private fields, do not use outside kernfs proper */ struct mutex mutex; + struct mutex prealloc_mutex; int event; struct list_head list; char *prealloc_buf; -- cgit v1.2.3 From a6341f000024cdf1ec14dc26743a409a17378db5 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 2 Apr 2016 17:59:46 -0700 Subject: Drivers: hv: vmbus: Introduce functions for estimating room in the ring buffer Introduce separate functions for estimating how much can be read from and written to the ring buffer. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index aa0fadce9308..66226ceade37 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -151,6 +151,33 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi, *read = dsize - *write; } +static inline u32 hv_get_bytes_to_read(struct hv_ring_buffer_info *rbi) +{ + u32 read_loc, write_loc, dsize, read; + + dsize = rbi->ring_datasize; + read_loc = rbi->ring_buffer->read_index; + write_loc = READ_ONCE(rbi->ring_buffer->write_index); + + read = write_loc >= read_loc ? (write_loc - read_loc) : + (dsize - read_loc) + write_loc; + + return read; +} + +static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi) +{ + u32 read_loc, write_loc, dsize, write; + + dsize = rbi->ring_datasize; + read_loc = READ_ONCE(rbi->ring_buffer->read_index); + write_loc = rbi->ring_buffer->write_index; + + write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : + read_loc - write_loc; + return write; +} + /* * VMBUS version is 32 bit entity broken up into * two 16 bit quantities: major_number. minor_number. -- cgit v1.2.3 From 5cc472477f928fb8584eb8e08245c9cf9002d74a Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 2 Apr 2016 17:59:49 -0700 Subject: Drivers: hv: vmbus: Export the vmbus_set_event() API In preparation for moving some ring buffer functionality out of the vmbus driver, export the API for signaling the host. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 66226ceade37..40fd608475f7 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1365,4 +1365,5 @@ extern __u32 vmbus_proto_version; int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, const uuid_le *shv_host_servie_id); +void vmbus_set_event(struct vmbus_channel *channel); #endif /* _HYPERV_H */ -- cgit v1.2.3 From 687f32e6d9bd1d63c5e557e877809eb446f1a6e8 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 2 Apr 2016 17:59:50 -0700 Subject: Drivers: hv: vmbus: Move some ring buffer functions to hyperv.h In preparation for implementing APIs for in-place consumption of VMBUS packets, movve some ring buffer functionality into hyperv.h Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 40fd608475f7..eb7c0b215ba4 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1366,4 +1366,58 @@ extern __u32 vmbus_proto_version; int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, const uuid_le *shv_host_servie_id); void vmbus_set_event(struct vmbus_channel *channel); + +/* Get the start of the ring buffer. */ +static inline void * +hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info) +{ + return (void *)ring_info->ring_buffer->buffer; +} + +/* + * To optimize the flow management on the send-side, + * when the sender is blocked because of lack of + * sufficient space in the ring buffer, potential the + * consumer of the ring buffer can signal the producer. + * This is controlled by the following parameters: + * + * 1. pending_send_sz: This is the size in bytes that the + * producer is trying to send. + * 2. The feature bit feat_pending_send_sz set to indicate if + * the consumer of the ring will signal when the ring + * state transitions from being full to a state where + * there is room for the producer to send the pending packet. + */ + +static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) +{ + u32 cur_write_sz; + u32 pending_sz; + + /* + * Issue a full memory barrier before making the signaling decision. + * Here is the reason for having this barrier: + * If the reading of the pend_sz (in this function) + * were to be reordered and read before we commit the new read + * index (in the calling function) we could + * have a problem. If the host were to set the pending_sz after we + * have sampled pending_sz and go to sleep before we commit the + * read index, we could miss sending the interrupt. Issue a full + * memory barrier to address this. + */ + virt_mb(); + + pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); + /* If the other end is not blocked on write don't bother. */ + if (pending_sz == 0) + return false; + + cur_write_sz = hv_get_bytes_to_write(rbi); + + if (cur_write_sz >= pending_sz) + return true; + + return false; +} + #endif /* _HYPERV_H */ -- cgit v1.2.3 From ab028db41ca9174caab7f9e3fc0a2e7f4a418410 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 2 Apr 2016 17:59:51 -0700 Subject: Drivers: hv: vmbus: Implement APIs to support "in place" consumption of vmbus packets Implement APIs for in-place consumption of vmbus packets. Currently, each packet is copied and processed one at a time and as part of processing each packet we potentially may signal the host (if it is waiting for room to produce a packet). These APIs help batched in-place processing of vmbus packets. We also optimize host signaling by having a separate API to signal the end of in-place consumption. With netvsc using these APIs, on an iperf run on average I see about 20X reduction in checks to signal the host. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index eb7c0b215ba4..590fee6a2e6f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -126,6 +126,8 @@ struct hv_ring_buffer_info { u32 ring_datasize; /* < ring_size */ u32 ring_data_startoffset; + u32 priv_write_index; + u32 priv_read_index; }; /* @@ -1420,4 +1422,88 @@ static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) return false; } +/* + * An API to support in-place processing of incoming VMBUS packets. + */ +#define VMBUS_PKT_TRAILER 8 + +static inline struct vmpacket_descriptor * +get_next_pkt_raw(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + void *ring_buffer = hv_get_ring_buffer(ring_info); + struct vmpacket_descriptor *cur_desc; + u32 packetlen; + u32 dsize = ring_info->ring_datasize; + u32 delta = read_loc - ring_info->ring_buffer->read_index; + u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); + + if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) + return NULL; + + if ((read_loc + sizeof(*cur_desc)) > dsize) + return NULL; + + cur_desc = ring_buffer + read_loc; + packetlen = cur_desc->len8 << 3; + + /* + * If the packet under consideration is wrapping around, + * return failure. + */ + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) + return NULL; + + return cur_desc; +} + +/* + * A helper function to step through packets "in-place" + * This API is to be called after each successful call + * get_next_pkt_raw(). + */ +static inline void put_pkt_raw(struct vmbus_channel *channel, + struct vmpacket_descriptor *desc) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + u32 read_loc = ring_info->priv_read_index; + u32 packetlen = desc->len8 << 3; + u32 dsize = ring_info->ring_datasize; + + if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) + BUG(); + /* + * Include the packet trailer. + */ + ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; +} + +/* + * This call commits the read index and potentially signals the host. + * Here is the pattern for using the "in-place" consumption APIs: + * + * while (get_next_pkt_raw() { + * process the packet "in-place"; + * put_pkt_raw(); + * } + * if (packets processed in place) + * commit_rd_index(); + */ +static inline void commit_rd_index(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *ring_info = &channel->inbound; + /* + * Make sure all reads are done before we update the read index since + * the writer may start writing to the read area once the read index + * is updated. + */ + virt_rmb(); + ring_info->ring_buffer->read_index = ring_info->priv_read_index; + + if (hv_need_to_signal_on_read(ring_info)) + vmbus_set_event(channel); +} + + #endif /* _HYPERV_H */ -- cgit v1.2.3 From 97fb77dc87582300fa3c141b63699f853576cab1 Mon Sep 17 00:00:00 2001 From: Jake Oshins Date: Tue, 5 Apr 2016 10:22:51 -0700 Subject: drivers:hv: Make a function to free mmio regions through vmbus This patch introduces a function that reverses everything done by vmbus_allocate_mmio(). Existing code just called release_mem_region(). Future patches in this series require a more complex sequence of actions, so this function is introduced to wrap those actions. Signed-off-by: Jake Oshins Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 590fee6a2e6f..b10954a66939 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1120,7 +1120,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, resource_size_t min, resource_size_t max, resource_size_t size, resource_size_t align, bool fb_overlap_ok); - +void vmbus_free_mmio(resource_size_t start, resource_size_t size); int vmbus_cpu_number_to_vp_number(int cpu_number); u64 hv_do_hypercall(u64 control, void *input, void *output); -- cgit v1.2.3 From 05d1a717ec0430c916a749b94eb90ab74bbfa356 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 29 Feb 2016 19:52:05 -0500 Subject: ima: add support for creating files using the mknodat syscall Commit 3034a14 "ima: pass 'opened' flag to identify newly created files" stopped identifying empty files as new files. However new empty files can be created using the mknodat syscall. On systems with IMA-appraisal enabled, these empty files are not labeled with security.ima extended attributes properly, preventing them from subsequently being opened in order to write the file data contents. This patch defines a new hook named ima_post_path_mknod() to mark these empty files, created using mknodat, as new in order to allow the file data contents to be written. In addition, files with security.ima xattrs containing a file signature are considered "immutable" and can not be modified. The file contents need to be written, before signing the file. This patch relaxes this requirement for new files, allowing the file signature to be written before the file contents. Changelog: - defer identifying files with signatures stored as security.ima (based on Dmitry Rozhkov's comments) - removing tests (eg. dentry, dentry->d_inode, inode->i_size == 0) (based on Al's review) Signed-off-by: Mimi Zohar Cc: Al Viro < Tested-by: Dmitry Rozhkov --- include/linux/ima.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index e6516cbbe9bf..0eb7c2e7f0d6 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -21,6 +21,7 @@ extern int ima_file_mmap(struct file *file, unsigned long prot); extern int ima_read_file(struct file *file, enum kernel_read_file_id id); extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); +extern void ima_post_path_mknod(struct dentry *dentry); #else static inline int ima_bprm_check(struct linux_binprm *bprm) @@ -54,6 +55,11 @@ static inline int ima_post_read_file(struct file *file, void *buf, loff_t size, return 0; } +static inline void ima_post_path_mknod(struct dentry *dentry) +{ + return; +} + #endif /* CONFIG_IMA */ #ifdef CONFIG_IMA_APPRAISE -- cgit v1.2.3 From 0f40fbbcc34e093255a2b2d70b6b0fb48c3f39aa Mon Sep 17 00:00:00 2001 From: Brian Bloniarz Date: Sun, 6 Mar 2016 13:16:30 -0800 Subject: Fix OpenSSH pty regression on close OpenSSH expects the (non-blocking) read() of pty master to return EAGAIN only if it has received all of the slave-side output after it has received SIGCHLD. This used to work on pre-3.12 kernels. This fix effectively forces non-blocking read() and poll() to block for parallel i/o to complete for all ttys. It also unwinds these changes: 1) f8747d4a466ab2cafe56112c51b3379f9fdb7a12 tty: Fix pty master read() after slave closes 2) 52bce7f8d4fc633c9a9d0646eef58ba6ae9a3b73 pty, n_tty: Simplify input processing on final close 3) 1a48632ffed61352a7810ce089dc5a8bcd505a60 pty: Fix input race when closing Inspired by analysis and patch from Marc Aurele La France Reported-by: Volth Reported-by: Marc Aurele La France BugLink: https://bugzilla.mindrot.org/show_bug.cgi?id=52 BugLink: https://bugzilla.mindrot.org/show_bug.cgi?id=2492 Signed-off-by: Brian Bloniarz Reviewed-by: Peter Hurley Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index bf1bcdb01df0..d82bb9281d12 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -351,7 +351,6 @@ struct tty_file_private { #define TTY_OTHER_CLOSED 2 /* Other side (if any) has closed */ #define TTY_EXCLUSIVE 3 /* Exclusive open mode */ #define TTY_DO_WRITE_WAKEUP 5 /* Call write_wakeup after queuing new */ -#define TTY_OTHER_DONE 6 /* Closed pty has completed input processing */ #define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ @@ -480,6 +479,7 @@ extern void tty_buffer_init(struct tty_port *port); extern void tty_buffer_set_lock_subclass(struct tty_port *port); extern bool tty_buffer_restart_work(struct tty_port *port); extern bool tty_buffer_cancel_work(struct tty_port *port); +extern void tty_buffer_flush_work(struct tty_port *port); extern speed_t tty_termios_baud_rate(struct ktermios *termios); extern speed_t tty_termios_input_baud_rate(struct ktermios *termios); extern void tty_termios_encode_baud_rate(struct ktermios *termios, -- cgit v1.2.3 From 795ddd18d38f9762fbfefceab9aa16caef0cf431 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Sun, 24 Apr 2016 20:28:05 +0100 Subject: nvmem: core: remove regmap dependency nvmem uses regmap_raw_read/write apis to read/write data from providers, regmap raw apis stopped working with recent kernels which removed raw accessors on mmio bus. This resulted in broken nvmem for providers which are based on regmap mmio bus. This issue can be fixed temporarly by moving to other regmap apis, but we might hit same issue in future. Moving to interfaces based on read/write callbacks from providers would be more robust. This patch removes regmap dependency from nvmem and introduces read/write callbacks from the providers. Without this patch nvmem providers like qfprom based on regmap mmio bus would not work. Reported-by: Rajendra Nayak Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index a4fcc90b0f20..cd93416d762e 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -14,6 +14,10 @@ struct nvmem_device; struct nvmem_cell_info; +typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, + void *val, size_t bytes); +typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, + void *val, size_t bytes); struct nvmem_config { struct device *dev; @@ -24,6 +28,12 @@ struct nvmem_config { int ncells; bool read_only; bool root_only; + nvmem_reg_read_t reg_read; + nvmem_reg_write_t reg_write; + int size; + int word_size; + int stride; + void *priv; /* To be only used by old driver/misc/eeprom drivers */ bool compat; struct device *base_dev; -- cgit v1.2.3 From 1f62ff34a90471d1b735bac2c79e894afc7c59bc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 24 Mar 2016 22:19:40 +0100 Subject: driver-core: use 'dev' argument in dev_dbg_ratelimited stub dev_dbg_ratelimited() is a macro that ignores its first argument when DEBUG is not set, which can lead to unused variable warnings: ethernet/mellanox/mlxsw/pci.c: In function 'mlxsw_pci_cqe_sdq_handle': ethernet/mellanox/mlxsw/pci.c:646:18: warning: unused variable 'pdev' [-Wunused-variable] ethernet/mellanox/mlxsw/pci.c: In function 'mlxsw_pci_cqe_rdq_handle': ethernet/mellanox/mlxsw/pci.c:671:18: warning: unused variable 'pdev' [-Wunused-variable] The macro already ensures that all its other arguments are silently ignored by the compiler without triggering a warning, through the use of the no_printk() macro, but the dev argument is not passed into that. This changes the definition to use the same trick as no_printk() with an if(0) that leads the compiler to not evaluate the side-effects but still see that 'dev' might not be unused. Signed-off-by: Arnd Bergmann Suggested-by: Andrew Lunn Fixes: 6f586e663e3b ("driver-core: Shut up dev_dbg_reatelimited() without DEBUG") Reviewed-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 002c59728dbe..07f74c246cac 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1293,8 +1293,11 @@ do { \ dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ } while (0) #else -#define dev_dbg_ratelimited(dev, fmt, ...) \ - no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#define dev_dbg_ratelimited(dev, fmt, ...) \ +do { \ + if (0) \ + dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ +} while (0) #endif #ifdef VERBOSE_DEBUG -- cgit v1.2.3 From 1af5bb491fbb41c8dab9d728a92758dd6a28afd4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Apr 2016 08:51:56 -0700 Subject: filemap: remove the pos argument to generic_file_direct_write Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 70e61b58baaf..e9eaa2074061 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2703,7 +2703,7 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos); -- cgit v1.2.3 From c8b8e32d700fe943a935e435ae251364d016c497 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Apr 2016 08:51:58 -0700 Subject: direct-io: eliminate the offset argument to ->direct_IO Including blkdev_direct_IO and dax_do_io. It has to be ki_pos to actually work, so eliminate the superflous argument. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/dax.h | 2 +- include/linux/fs.h | 9 ++++----- include/linux/nfs_fs.h | 5 ++--- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 636dd59ab505..982a6c4a62f3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -5,7 +5,7 @@ #include #include -ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, +ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, get_block_t, dio_iodone_t, int flags); int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); diff --git a/include/linux/fs.h b/include/linux/fs.h index e9eaa2074061..e6b2de159736 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -394,7 +394,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); - ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* * migrate the contents of a page to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. @@ -2766,18 +2766,17 @@ void dio_end_io(struct bio *bio, int error); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, - loff_t offset, get_block_t get_block, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags); static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, - struct iov_iter *iter, loff_t offset, + struct iov_iter *iter, get_block_t get_block) { return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, - offset, get_block, NULL, NULL, - DIO_LOCKING | DIO_SKIP_HOLES); + get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67300f8e5f2f..cede8f6a7e2d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -445,10 +445,9 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) /* * linux/fs/nfs/direct.c */ -extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, - struct iov_iter *iter, - loff_t pos); + struct iov_iter *iter); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter); -- cgit v1.2.3 From dde0c2e79848298cc25621ad080d47f94dbd7cce Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Apr 2016 08:52:00 -0700 Subject: fs: add IOCB_SYNC and IOCB_DSYNC This will allow us to do per-I/O sync file writes, as required by a lot of fileservers or storage targets. XXX: Will need a few additional audits for O_DSYNC Signed-off-by: Al Viro --- include/linux/fs.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e6b2de159736..310ca1ed9293 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -323,6 +323,8 @@ struct writeback_control; #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) #define IOCB_HIPRI (1 << 3) +#define IOCB_DSYNC (1 << 4) +#define IOCB_SYNC (1 << 5) struct kiocb { struct file *ki_filp; @@ -2485,12 +2487,12 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) +static inline int generic_write_sync(struct kiocb *iocb, loff_t pos, loff_t count) { - if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) + if (!(iocb->ki_flags & IOCB_DSYNC)) return 0; - return vfs_fsync_range(file, pos, pos + count - 1, - (file->f_flags & __O_SYNC) ? 0 : 1); + return vfs_fsync_range(iocb->ki_filp, pos, pos + count - 1, + (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); } extern void emergency_sync(void); extern void emergency_remount(void); @@ -2942,6 +2944,10 @@ static inline int iocb_flags(struct file *file) res |= IOCB_APPEND; if (io_is_direct(file)) res |= IOCB_DIRECT; + if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) + res |= IOCB_DSYNC; + if (file->f_flags & __O_SYNC) + res |= IOCB_SYNC; return res; } -- cgit v1.2.3 From e259221763a40403d5bb232209998e8c45804ab8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Apr 2016 08:52:01 -0700 Subject: fs: simplify the generic_write_sync prototype The kiocb already has the new position, so use that. The only interesting case is AIO, where we currently don't bother updating ki_pos. We're about to free the kiocb after we're done, so we might as well update it to make everyone's life simpler. While we're at it also return the bytes written argument passed in if we were successful so that the boilerplate error switch code in the callers can go away. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 310ca1ed9293..f6a8ed864651 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2487,13 +2487,25 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -static inline int generic_write_sync(struct kiocb *iocb, loff_t pos, loff_t count) -{ - if (!(iocb->ki_flags & IOCB_DSYNC)) - return 0; - return vfs_fsync_range(iocb->ki_filp, pos, pos + count - 1, - (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); + +/* + * Sync the bytes written if this was a synchronous write. Expect ki_pos + * to already be updated for the write, and will return either the amount + * of bytes passed in, or an error if syncing the file failed. + */ +static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) +{ + if (iocb->ki_flags & IOCB_DSYNC) { + int ret = vfs_fsync_range(iocb->ki_filp, + iocb->ki_pos - count, iocb->ki_pos - 1, + (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); + if (ret) + return ret; + } + + return count; } + extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK -- cgit v1.2.3 From 8ac0fba2da41620f4931a1007c71b0d4723eb02a Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Sun, 1 May 2016 17:50:29 -0400 Subject: isa: Decouple X86_32 dependency from the ISA Kconfig option The introduction of the ISA_BUS option blocks the compilation of ISA drivers on non-x86 platforms. The ISA_BUS configuration option should not be necessary if the X86_32 dependency can be decoupled from the ISA configuration option. This patch both removes the ISA_BUS configuration option entirely and removes the X86_32 dependency from the ISA configuration option. Acked-by: Ingo Molnar Signed-off-by: William Breathitt Gray Signed-off-by: Greg Kroah-Hartman --- include/linux/isa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/isa.h b/include/linux/isa.h index 2a02862775eb..b0270e3814c8 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -22,7 +22,7 @@ struct isa_driver { #define to_isa_driver(x) container_of((x), struct isa_driver, driver) -#ifdef CONFIG_ISA_BUS +#ifdef CONFIG_ISA int isa_register_driver(struct isa_driver *, unsigned int); void isa_unregister_driver(struct isa_driver *); #else -- cgit v1.2.3 From 03dc76ca1ee5d02401d5a22ed7ddf15b5e9dfe76 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 28 Apr 2016 20:20:52 -0400 Subject: qed: add infrastructure for device self tests. This patch adds the functionality and APIs needed for selftests. It adds the ability to configure the link-mode which is required for the implementation of loopback tests. It adds the APIs for clock test, register test, interrupt test and memory test. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index e5de42b62976..d72c832a9397 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -110,6 +110,7 @@ struct qed_link_params { #define QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS BIT(1) #define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED BIT(2) #define QED_LINK_OVERRIDE_PAUSE_CONFIG BIT(3) +#define QED_LINK_OVERRIDE_LOOPBACK_MODE BIT(4) u32 override_flags; bool autoneg; u32 adv_speeds; @@ -118,6 +119,12 @@ struct qed_link_params { #define QED_LINK_PAUSE_RX_ENABLE BIT(1) #define QED_LINK_PAUSE_TX_ENABLE BIT(2) u32 pause_config; +#define QED_LINK_LOOPBACK_NONE BIT(0) +#define QED_LINK_LOOPBACK_INT_PHY BIT(1) +#define QED_LINK_LOOPBACK_EXT_PHY BIT(2) +#define QED_LINK_LOOPBACK_EXT BIT(3) +#define QED_LINK_LOOPBACK_MAC BIT(4) + u32 loopback_mode; }; struct qed_link_output { @@ -158,7 +165,47 @@ struct qed_common_cb_ops { struct qed_link_output *link); }; +struct qed_selftest_ops { +/** + * @brief selftest_interrupt - Perform interrupt test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_interrupt)(struct qed_dev *cdev); + +/** + * @brief selftest_memory - Perform memory test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_memory)(struct qed_dev *cdev); + +/** + * @brief selftest_register - Perform register test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_register)(struct qed_dev *cdev); + +/** + * @brief selftest_clock - Perform clock test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_clock)(struct qed_dev *cdev); +}; + struct qed_common_ops { + struct qed_selftest_ops *selftest; + struct qed_dev* (*probe)(struct pci_dev *dev, enum qed_protocol protocol, u32 dp_module, u8 dp_level); -- cgit v1.2.3 From 452e5eef6d311e52f657b34d999758107ec3dd4a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 1 Apr 2016 17:44:33 +0200 Subject: mmc: tmio: Add UHS-I mode support Based on work by Shinobu Uehara and Ben Dooks. This adds the voltage switch operation needed for all UHS-I modes, but not the tuning needed for SDR-104 which will come later. Signed-off-by: Ben Hutchings Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- include/linux/mmc/tmio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/tmio.h b/include/linux/mmc/tmio.h index 5f5cd80e9765..b2f28e995033 100644 --- a/include/linux/mmc/tmio.h +++ b/include/linux/mmc/tmio.h @@ -63,6 +63,8 @@ #define TMIO_STAT_CMD_BUSY 0x40000000 #define TMIO_STAT_ILL_ACCESS 0x80000000 +#define TMIO_STATUS2_DAT0 BIT(7) + #define CLK_CTL_DIV_MASK 0xff #define CLK_CTL_SCLKEN BIT(8) -- cgit v1.2.3 From 93b6911ac1ffc1fc9aba92c9e19063d47e7cf236 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 1 Apr 2016 17:44:36 +0200 Subject: mmc: host: add note that set_ios needs to handle 0Hz properly While here, refactor the comments so that they are before the declaration they are referring to. Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8dd4d290ab0d..85800b48241f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -93,28 +93,39 @@ struct mmc_host_ops { void (*pre_req)(struct mmc_host *host, struct mmc_request *req, bool is_first_req); void (*request)(struct mmc_host *host, struct mmc_request *req); + + /* + * Avoid calling the next three functions too often or in a "fast + * path", since underlaying controller might implement them in an + * expensive and/or slow way. Also note that these functions might + * sleep, so don't call them in the atomic contexts! + */ + + /* + * Notes to the set_ios callback: + * ios->clock might be 0. For some controllers, setting 0Hz + * as any other frequency works. However, some controllers + * explicitly need to disable the clock. Otherwise e.g. voltage + * switching might fail because the SDCLK is not really quiet. + */ + void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios); + /* - * Avoid calling these three functions too often or in a "fast path", - * since underlaying controller might implement them in an expensive - * and/or slow way. - * - * Also note that these functions might sleep, so don't call them - * in the atomic contexts! - * * Return values for the get_ro callback should be: * 0 for a read/write card * 1 for a read-only card * -ENOSYS when not supported (equal to NULL callback) * or a negative errno value when something bad happened - * + */ + int (*get_ro)(struct mmc_host *host); + + /* * Return values for the get_cd callback should be: * 0 for a absent card * 1 for a present card * -ENOSYS when not supported (equal to NULL callback) * or a negative errno value when something bad happened */ - void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios); - int (*get_ro)(struct mmc_host *host); int (*get_cd)(struct mmc_host *host); void (*enable_sdio_irq)(struct mmc_host *host, int enable); -- cgit v1.2.3 From 49b17858c19b94d46e3d872f85eccba45fff21f4 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 9 Mar 2016 10:33:55 +0800 Subject: mmc: dw_mmc: fix warning reported by kernel-doc Try to fix the warning reported by: scripts/kernel-doc -man -v include/linux/mmc/dw_mmc.h > /dev/null warning: No description found for parameter 'irq_lock' warning: No description found for parameter 'stop_abort' warning: No description found for parameter 'prev_blksz' warning: No description found for parameter 'timing' warning: No description found for parameter 'ring_size' warning: No description found for parameter 'dms' warning: No description found for parameter 'phy_regs' warning: No description found for parameter 'fifoth_val' warning: No description found for parameter 'vqmmc_enabled' warning: No description found for parameter 'cmd11_timer' warning: Excess struct/union/enum/typedef member 'card_tasklet' description in 'dw_mci' Signed-off-by: Shawn Lin Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- include/linux/mmc/dw_mmc.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 7b41c6db1bb6..c8b6a4d295b5 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -55,6 +55,7 @@ struct dw_mci_dma_slave { /** * struct dw_mci - MMC controller state shared between all slots * @lock: Spinlock protecting the queue and associated data. + * @irq_lock: Spinlock protecting the INTMASK setting. * @regs: Pointer to MMIO registers. * @fifo_reg: Pointer to MMIO registers for data FIFO * @sg: Scatterlist entry currently being processed by PIO code, if any. @@ -65,6 +66,9 @@ struct dw_mci_dma_slave { * @cmd: The command currently being sent to the card, or NULL. * @data: The data currently being transferred, or NULL if no data * transfer is in progress. + * @stop_abort: The command currently prepared for stoping transfer. + * @prev_blksz: The former transfer blksz record. + * @timing: Record of current ios timing. * @use_dma: Whether DMA channel is initialized or not. * @using_dma: Whether DMA is in use for the current transfer. * @dma_64bit_address: Whether DMA supports 64-bit address mode or not. @@ -72,7 +76,10 @@ struct dw_mci_dma_slave { * @sg_cpu: Virtual address of DMA buffer. * @dma_ops: Pointer to platform-specific DMA callbacks. * @cmd_status: Snapshot of SR taken upon completion of the current + * @ring_size: Buffer size for idma descriptors. * command. Only valid when EVENT_CMD_COMPLETE is pending. + * @dms: structure of slave-dma private data. + * @phy_regs: physical address of controller's register map * @data_status: Snapshot of SR taken upon completion of the current * data transfer. Only valid when EVENT_DATA_COMPLETE or * EVENT_DATA_ERROR is pending. @@ -80,7 +87,6 @@ struct dw_mci_dma_slave { * to be sent. * @dir_status: Direction of current transfer. * @tasklet: Tasklet running the request state machine. - * @card_tasklet: Tasklet handling card detect. * @pending_events: Bitmask of events flagged by the interrupt handler * to be processed by the tasklet. * @completed_events: Bitmask of events which the state machine has @@ -91,6 +97,7 @@ struct dw_mci_dma_slave { * rate and timeout calculations. * @current_speed: Configured rate of the controller. * @num_slots: Number of slots available. + * @fifoth_val: The value of FIFOTH register. * @verid: Denote Version ID. * @dev: Device associated with the MMC controller. * @pdata: Platform data associated with the MMC controller. @@ -107,9 +114,11 @@ struct dw_mci_dma_slave { * @push_data: Pointer to FIFO push function. * @pull_data: Pointer to FIFO pull function. * @quirks: Set of quirks that apply to specific versions of the IP. + * @vqmmc_enabled: Status of vqmmc, should be true or false. * @irq_flags: The flags to be passed to request_irq. * @irq: The irq value to be passed to request_irq. * @sdio_id0: Number of slot0 in the SDIO interrupt registers. + * @cmd11_timer: Timer for SD3.0 voltage switch over scheme. * @dto_timer: Timer for broken data transfer over scheme. * * Locking -- cgit v1.2.3 From 6929eeec2a7c5fe29d8d5cd0873089fc733943b0 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 3 Feb 2016 11:26:04 +0800 Subject: mmc: dw_mmc: remove unused EVENT_XFER_ERROR EVENT_XFER_ERROR isn't been used now, so it can be removed. Signed-off-by: Shawn Lin Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- include/linux/mmc/dw_mmc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index c8b6a4d295b5..f7ed271a1d54 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -36,7 +36,6 @@ enum { EVENT_XFER_COMPLETE, EVENT_DATA_COMPLETE, EVENT_DATA_ERROR, - EVENT_XFER_ERROR }; struct mmc_data; -- cgit v1.2.3 From ac86045ee9cd89774030ff1c21c7ff35f1c1eeaa Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 26 Apr 2016 17:55:26 +0200 Subject: mmc: tmio: merge distributed include files There is no reason to have a public and private header file. Merge them into a private one, so looking up symbols is less confusing. Signed-off-by: Wolfram Sang Acked-by: Arnd Bergmann Signed-off-by: Ulf Hansson --- include/linux/mmc/tmio.h | 73 ------------------------------------------------ 1 file changed, 73 deletions(-) delete mode 100644 include/linux/mmc/tmio.h (limited to 'include/linux') diff --git a/include/linux/mmc/tmio.h b/include/linux/mmc/tmio.h deleted file mode 100644 index b2f28e995033..000000000000 --- a/include/linux/mmc/tmio.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * include/linux/mmc/tmio.h - * - * Copyright (C) 2016 Sang Engineering, Wolfram Sang - * Copyright (C) 2015-16 Renesas Electronics Corporation - * Copyright (C) 2007 Ian Molton - * Copyright (C) 2004 Ian Molton - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Driver for the MMC / SD / SDIO cell found in: - * - * TC6393XB TC6391XB TC6387XB T7L66XB ASIC3 - */ -#ifndef LINUX_MMC_TMIO_H -#define LINUX_MMC_TMIO_H - -#define CTL_SD_CMD 0x00 -#define CTL_ARG_REG 0x04 -#define CTL_STOP_INTERNAL_ACTION 0x08 -#define CTL_XFER_BLK_COUNT 0xa -#define CTL_RESPONSE 0x0c -#define CTL_STATUS 0x1c -#define CTL_STATUS2 0x1e -#define CTL_IRQ_MASK 0x20 -#define CTL_SD_CARD_CLK_CTL 0x24 -#define CTL_SD_XFER_LEN 0x26 -#define CTL_SD_MEM_CARD_OPT 0x28 -#define CTL_SD_ERROR_DETAIL_STATUS 0x2c -#define CTL_SD_DATA_PORT 0x30 -#define CTL_TRANSACTION_CTL 0x34 -#define CTL_SDIO_STATUS 0x36 -#define CTL_SDIO_IRQ_MASK 0x38 -#define CTL_DMA_ENABLE 0xd8 -#define CTL_RESET_SD 0xe0 -#define CTL_VERSION 0xe2 -#define CTL_SDIO_REGS 0x100 -#define CTL_CLK_AND_WAIT_CTL 0x138 -#define CTL_RESET_SDIO 0x1e0 - -/* Definitions for values the CTRL_STATUS register can take. */ -#define TMIO_STAT_CMDRESPEND 0x00000001 -#define TMIO_STAT_DATAEND 0x00000004 -#define TMIO_STAT_CARD_REMOVE 0x00000008 -#define TMIO_STAT_CARD_INSERT 0x00000010 -#define TMIO_STAT_SIGSTATE 0x00000020 -#define TMIO_STAT_WRPROTECT 0x00000080 -#define TMIO_STAT_CARD_REMOVE_A 0x00000100 -#define TMIO_STAT_CARD_INSERT_A 0x00000200 -#define TMIO_STAT_SIGSTATE_A 0x00000400 -#define TMIO_STAT_CMD_IDX_ERR 0x00010000 -#define TMIO_STAT_CRCFAIL 0x00020000 -#define TMIO_STAT_STOPBIT_ERR 0x00040000 -#define TMIO_STAT_DATATIMEOUT 0x00080000 -#define TMIO_STAT_RXOVERFLOW 0x00100000 -#define TMIO_STAT_TXUNDERRUN 0x00200000 -#define TMIO_STAT_CMDTIMEOUT 0x00400000 -#define TMIO_STAT_RXRDY 0x01000000 -#define TMIO_STAT_TXRQ 0x02000000 -#define TMIO_STAT_ILL_FUNC 0x20000000 -#define TMIO_STAT_CMD_BUSY 0x40000000 -#define TMIO_STAT_ILL_ACCESS 0x80000000 - -#define TMIO_STATUS2_DAT0 BIT(7) - -#define CLK_CTL_DIV_MASK 0xff -#define CLK_CTL_SCLKEN BIT(8) - -#define TMIO_BBS 512 /* Boot block size */ - -#endif /* LINUX_MMC_TMIO_H */ -- cgit v1.2.3 From 010629436d83dc6a5c489847b0a1b8d5449a962f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 28 Apr 2016 08:18:11 +0200 Subject: mmc: sh_mobile_sdhi: remove obsolete include file A few SH boards include the file but don't make use of it (no named interrupts). The SDHI code removed support for this feature as well. So, drop the references and ultimately remove the unneeded file. Signed-off-by: Wolfram Sang Acked-by: Rich Felker Acked-by: Yoshinori Sato Signed-off-by: Ulf Hansson --- include/linux/mmc/sh_mobile_sdhi.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 include/linux/mmc/sh_mobile_sdhi.h (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h deleted file mode 100644 index 95d6f0314a7d..000000000000 --- a/include/linux/mmc/sh_mobile_sdhi.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef LINUX_MMC_SH_MOBILE_SDHI_H -#define LINUX_MMC_SH_MOBILE_SDHI_H - -#include - -#define SH_MOBILE_SDHI_IRQ_CARD_DETECT "card_detect" -#define SH_MOBILE_SDHI_IRQ_SDCARD "sdcard" -#define SH_MOBILE_SDHI_IRQ_SDIO "sdio" - -#endif /* LINUX_MMC_SH_MOBILE_SDHI_H */ -- cgit v1.2.3 From 2e65060e803e046fc9b5ed0107494a452424845e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 27 Apr 2016 14:15:38 +0300 Subject: dmaengine: dw: revisit data_width property There several changes are done here: - Convert the property to be in bytes Besides that this is a common practice for such property, the use of a value in bytes much more convenient than handling the encoded one. - Rename data_width to data-width in the device tree bindings The change leaves the support for the old format as well just in case someone will use a newer kernel with an old device tree blob. - While here, replace dwc_fast_ffs() by __ffs() Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-dw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index b881b978e486..ad768111c350 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -43,7 +43,7 @@ struct dw_dma_slave { * @block_size: Maximum block size supported by the controller * @nr_masters: Number of AHB masters supported by the controller * @data_width: Maximum data width supported by hardware per AHB master - * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) + * (in bytes, power of 2) */ struct dw_dma_platform_data { unsigned int nr_channels; -- cgit v1.2.3 From 161c3d04aeca8a5bfffe3902786bdf0ccd8575c0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 27 Apr 2016 14:15:39 +0300 Subject: dmaengine: dw: keep entire platform data in struct dw_dma Keep the entire platform data in the struct dw_dma. It makes the driver a bit cleaner. Acked-by: Viresh Kumar Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-dw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index ad768111c350..d15d8ba8cc24 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -55,7 +55,7 @@ struct dw_dma_platform_data { #define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ #define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ unsigned char chan_priority; - unsigned short block_size; + unsigned int block_size; unsigned char nr_masters; unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; }; -- cgit v1.2.3 From 3a14c66d43d018baed96ceb74f9ab548878c09b8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 27 Apr 2016 14:15:40 +0300 Subject: dmaengine: dw: pass platform data via struct dw_dma_chip We pass struct dw_dma_chip to dw_dma_probe() anyway, thus we may use it to pass a platform data as well. While here, constify the source of the platform data. Acked-by: Viresh Kumar Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- include/linux/dma/dw.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h index 71456442ebe3..f2e538aaddad 100644 --- a/include/linux/dma/dw.h +++ b/include/linux/dma/dw.h @@ -27,6 +27,7 @@ struct dw_dma; * @regs: memory mapped I/O space * @clk: hclk clock * @dw: struct dw_dma that is filed by dw_dma_probe() + * @pdata: pointer to platform data */ struct dw_dma_chip { struct device *dev; @@ -34,10 +35,12 @@ struct dw_dma_chip { void __iomem *regs; struct clk *clk; struct dw_dma *dw; + + const struct dw_dma_platform_data *pdata; }; /* Export to the platform drivers */ -int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata); +int dw_dma_probe(struct dw_dma_chip *chip); int dw_dma_remove(struct dw_dma_chip *chip); /* DMA API extensions */ -- cgit v1.2.3 From 01292cea0df86ed4a1eb6450d6eda375ef925716 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 25 Apr 2016 08:14:23 +0100 Subject: genirq: Make irq_destroy_ipi take a cpumask of IPIs to destroy Previously irq_destroy_ipi() would destroy IPIs to all CPUs that were configured by irq_reserve_ipi(). This change makes it possible to destroy just a subset of the IPIs. This may be useful to remove IPIs to CPUs that have been hot removed so that the IRQ numbers allocated within the IPI domain can be re-used. The original behaviour is restored by passing the complete mask that the IPI was created with. There are currently no users of this function that would break from the API change. Signed-off-by: Matt Redfearn Cc: linux-mips@linux-mips.org Cc: jason@lakedaemon.net Cc: marc.zyngier@arm.com Cc: ralf@linux-mips.org Cc: Qais Yousef Cc: lisa.parratt@imgtec.com Cc: jiang.liu@linux.intel.com Link: http://lkml.kernel.org/r/1461568464-31701-1-git-send-email-matt.redfearn@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 2aed04396210..e1b81d35e7a3 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -348,7 +348,7 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, /* IPI functions */ unsigned int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest); -void irq_destroy_ipi(unsigned int irq); +void irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); /* V2 interfaces to support hierarchy IRQ domains. */ extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, -- cgit v1.2.3 From 7cec18a3906b52e855c9386650c0226bbe594a4c Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 25 Apr 2016 08:14:24 +0100 Subject: genirq: Add error code reporting to irq_{reserve,destroy}_ipi Make these functions return appropriate error codes when something goes wrong. Previously irq_destroy_ipi returned void making it impossible to notify the caller if the request could not be fulfilled. Patch 1 in the series added another condition in which this could fail in addition to the existing ones. irq_reserve_ipi returned an unsigned int meaning it could only return 0 on failure and give the caller no indication as to why the request failed. As time goes on there are likely to be further conditions added in which these functions can fail. These APIs and the IPI IRQ domain are new in 4.6 and the number of existing call sites are low, changing the API now has little impact on the code, while making it easier for these functions to grow over time. Signed-off-by: Matt Redfearn Cc: linux-mips@linux-mips.org Cc: jason@lakedaemon.net Cc: marc.zyngier@arm.com Cc: ralf@linux-mips.org Cc: Qais Yousef Cc: lisa.parratt@imgtec.com Cc: jiang.liu@linux.intel.com Link: http://lkml.kernel.org/r/1461568464-31701-2-git-send-email-matt.redfearn@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e1b81d35e7a3..736abd74c135 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -346,9 +346,8 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, irq_hw_number_t *out_hwirq, unsigned int *out_type); /* IPI functions */ -unsigned int irq_reserve_ipi(struct irq_domain *domain, - const struct cpumask *dest); -void irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); +int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest); +int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); /* V2 interfaces to support hierarchy IRQ domains. */ extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, -- cgit v1.2.3 From 651e8b54abdeeaa36f5f54ffa05c18707a3cc1d0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 11 Apr 2016 09:57:51 +0100 Subject: irqdomain: Allow domain matching on irq_fwspec When iterating over the irq domain list, we try to match a domain either by calling a match() function or by comparing a number of fields passed as parameters. Both approaches are a bit restrictive: - match() is DT specific and only takes a device node - the fallback case only deals with the fwnode_handle It would be useful if we had a per-domain function that would actually perform the matching check on the whole of the irq_fwspec structure. This would allow for a domain to triage matching attempts that need to extend beyond the fwnode. Let's introduce irq_find_matching_fwspec(), which takes a full blown irq_fwspec structure, and call into a select() function implemented by the irqdomain. irq_find_matching_fwnode() is made a wrapper around irq_find_matching_fwspec in order to preserve compatibility. Signed-off-by: Marc Zyngier Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: Jason Cooper Cc: Will Deacon Cc: Rob Herring Link: http://lkml.kernel.org/r/1460365075-7316-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 736abd74c135..f1f36e04d885 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -96,6 +96,8 @@ enum irq_domain_bus_token { struct irq_domain_ops { int (*match)(struct irq_domain *d, struct device_node *node, enum irq_domain_bus_token bus_token); + int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token); int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw); void (*unmap)(struct irq_domain *d, unsigned int virq); int (*xlate)(struct irq_domain *d, struct device_node *node, @@ -211,7 +213,7 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); -extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, +extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); extern void irq_set_default_host(struct irq_domain *host); extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, @@ -227,6 +229,17 @@ static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode) return fwnode && fwnode->type == FWNODE_IRQCHIP; } +static inline +struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token) +{ + struct irq_fwspec fwspec = { + .fwnode = fwnode, + }; + + return irq_find_matching_fwspec(&fwspec, bus_token); +} + static inline struct irq_domain *irq_find_matching_host(struct device_node *node, enum irq_domain_bus_token bus_token) { -- cgit v1.2.3 From 222df54fd8b7641dcc81476f157806bb3144ee1d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 11 Apr 2016 09:57:52 +0100 Subject: genirq: Allow the affinity of a percpu interrupt to be set/retrieved In order to prepare the genirq layer for the concept of partitionned percpu interrupts, let's allow an affinity to be associated with such an interrupt. We introduce: - irq_set_percpu_devid_partition: flag an interrupt as a percpu-devid interrupt, and associate it with an affinity - irq_get_percpu_devid_partition: allow the affinity of that interrupt to be retrieved. This will allow a driver to discover which CPUs the per-cpu interrupt can actually fire on. Signed-off-by: Marc Zyngier Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: Jason Cooper Cc: Will Deacon Cc: Rob Herring Link: http://lkml.kernel.org/r/1460365075-7316-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 4 ++++ include/linux/irqdesc.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index c4de62348ff2..4d758a7c604a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -530,6 +530,10 @@ static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *c } extern int irq_set_percpu_devid(unsigned int irq); +extern int irq_set_percpu_devid_partition(unsigned int irq, + const struct cpumask *affinity); +extern int irq_get_percpu_devid_partition(unsigned int irq, + struct cpumask *affinity); extern void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dcca77c4b9d2..b51beebf9804 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -66,6 +66,7 @@ struct irq_desc { int threads_handled_last; raw_spinlock_t lock; struct cpumask *percpu_enabled; + const struct cpumask *percpu_affinity; #ifdef CONFIG_SMP const struct cpumask *affinity_hint; struct irq_affinity_notify *affinity_notify; -- cgit v1.2.3 From 9e2c986cb460bf97154f18e85aa833739a1e8dc7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 11 Apr 2016 09:57:53 +0100 Subject: irqchip: Add per-cpu interrupt partitioning library We've unfortunately started seeing a situation where percpu interrupts are partitioned in the system: one arbitrary set of CPUs has an interrupt connected to a type of device, while another disjoint set of CPUs has the same interrupt connected to another type of device. This makes it impossible to have a device driver requesting this interrupt using the current percpu-interrupt abstraction, as the same interrupt number is now potentially claimed by at least two drivers, and we forbid interrupt sharing on per-cpu interrupt. A solution to this is to turn things upside down. Let's assume that our system describes all the possible partitions for a given interrupt, and give each of them a unique identifier. It is then possible to create a namespace where the affinity identifier itself is a form of interrupt number. At this point, it becomes easy to implement a set of partitions as a cascaded irqchip, each affinity identifier being the HW irq. This allows us to keep a number of nice properties: - Each partition results in a separate percpu-interrupt (with a restrictied affinity), which keeps drivers happy. - Because the underlying interrupt is still per-cpu, the overhead of the indirection can be kept pretty minimal. - The core code can ignore most of that crap. For that purpose, we implement a small library that deals with some of the boilerplate code, relying on platform-specific drivers to provide a description of the affinity sets and a set of callbacks. Signed-off-by: Marc Zyngier Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: Jason Cooper Cc: Will Deacon Cc: Rob Herring Link: http://lkml.kernel.org/r/1460365075-7316-4-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqchip/irq-partition-percpu.h | 59 ++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 include/linux/irqchip/irq-partition-percpu.h (limited to 'include/linux') diff --git a/include/linux/irqchip/irq-partition-percpu.h b/include/linux/irqchip/irq-partition-percpu.h new file mode 100644 index 000000000000..87433a5d1285 --- /dev/null +++ b/include/linux/irqchip/irq-partition-percpu.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2016 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +struct partition_affinity { + cpumask_t mask; + void *partition_id; +}; + +struct partition_desc; + +#ifdef CONFIG_PARTITION_PERCPU +int partition_translate_id(struct partition_desc *desc, void *partition_id); +struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode, + struct partition_affinity *parts, + int nr_parts, + int chained_irq, + const struct irq_domain_ops *ops); +struct irq_domain *partition_get_domain(struct partition_desc *dsc); +#else +static inline int partition_translate_id(struct partition_desc *desc, + void *partition_id) +{ + return -EINVAL; +} + +static inline +struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode, + struct partition_affinity *parts, + int nr_parts, + int chained_irq, + const struct irq_domain_ops *ops) +{ + return NULL; +} + +static inline +struct irq_domain *partition_get_domain(struct partition_desc *dsc) +{ + return NULL; +} +#endif -- cgit v1.2.3 From 011d6f5c3e5f38a767c8f4c7e2de73dc91959cb0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 2 May 2016 12:59:19 +0200 Subject: of: include errno.h in of_graph.h When CONFIG_OF is disabled, we have to include linux/errno.h before including of_graph.h, or get build errors like in the newly added sun4i drm driver: In file included from ../drivers/gpu/drm/sun4i/sun4i_drv.c:14:0: include/linux/of_graph.h: In function 'of_graph_parse_endpoint': include/linux/of_graph.h:58:10: error: 'ENOSYS' undeclared (first use in this function) A better solution is to ensure that the header can be included by itself, so let's include linux/errno.h here to fix the error we just got, and any similar future error. Signed-off-by: Arnd Bergmann Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Signed-off-by: Rob Herring --- include/linux/of_graph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index f8bcd0e21a26..bb3a5a2cd570 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -15,6 +15,7 @@ #define __LINUX_OF_GRAPH_H #include +#include /** * struct of_endpoint - the OF graph endpoint data structure -- cgit v1.2.3 From ba0263340a2aeaf5f08e4f2b0f4c29e300828b06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Thu, 28 Apr 2016 17:18:34 +0200 Subject: fbdev: fb_defio: Export fb_deferred_io_mmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export fb_deferred_io_mmap so drivers can change vma->vm_page_prot. When the framebuffer memory is allocated using dma_alloc_writecombine() instead of vmalloc(), I get cache syncing problems on ARM. This solves it: static int drm_fbdev_cma_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma) { fb_deferred_io_mmap(info, vma); vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); return 0; } Could this have been done in the core? Drivers that don't set (struct fb_ops *)->fb_mmap, gets a call to fb_pgprotect() at the end of the default fb_mmap implementation (drivers/video/fbdev/core/fbmem.c). This is an architecture specific function that on many platforms uses pgprot_writecombine(), but not on all. And looking at some of the fb_mmap implementations, some of them sets vm_page_prot to nocache for instance, so I think the safest bet is to do this in the driver and not in the fbdev core. And we can't call fb_pgprotect() from fb_deferred_io_mmap() either because we don't have access to the file pointer that powerpc needs. Signed-off-by: Noralf Trønnes Acked-by: Tomi Valkeinen Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1461856717-6476-5-git-send-email-noralf@tronnes.org --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index dfe88351341f..a964d076b4dc 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -673,6 +673,7 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, } /* drivers/video/fb_defio.c */ +int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma); extern void fb_deferred_io_init(struct fb_info *info); extern void fb_deferred_io_open(struct fb_info *info, struct inode *inode, -- cgit v1.2.3 From a5b714ad395803a6aa91793b9e52a81b176b8ba9 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Wed, 27 Apr 2016 20:10:16 +0800 Subject: NVMe: correct comment for offset enum of controller registers in nvme.h Section 3.1 gives the comment for the offset of controller registers in the specification 1.2a. Some are mis-copied in the header file nvme.h. Correct them. Signed-off-by: Wang Sheng-Hui Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a55986f6fe38..7d51b2904cb7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -21,13 +21,13 @@ enum { NVME_REG_CAP = 0x0000, /* Controller Capabilities */ NVME_REG_VS = 0x0008, /* Version */ NVME_REG_INTMS = 0x000c, /* Interrupt Mask Set */ - NVME_REG_INTMC = 0x0010, /* Interrupt Mask Set */ + NVME_REG_INTMC = 0x0010, /* Interrupt Mask Clear */ NVME_REG_CC = 0x0014, /* Controller Configuration */ NVME_REG_CSTS = 0x001c, /* Controller Status */ NVME_REG_NSSR = 0x0020, /* NVM Subsystem Reset */ NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */ NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */ - NVME_REG_ACQ = 0x0030, /* Admin SQ Base Address */ + NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ }; -- cgit v1.2.3 From 38f252553300ee1d3346a5273e95fe1dd60ca50a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 16 Apr 2016 14:55:28 -0400 Subject: block: add __blkdev_issue_discard This is a version of blkdev_issue_discard which doesn't wait for the I/O to complete, but instead allows the caller to submit the final bio and/or chain it to others. Signed-off-by: Christoph Hellwig Signed-off-by: Ming Lin Signed-off-by: Sagi Grimberg Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba72687c5654..b79131acf6c0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1131,6 +1131,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); +extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, -- cgit v1.2.3 From 339e6e31d2bfb40354cbfe672b357b88a88223f2 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Sun, 1 May 2016 18:42:47 -0400 Subject: isa: Implement the module_isa_driver macro The module_isa_driver macro is a helper macro for ISA drivers which do not do anything special in module init/exit. This eliminates a lot of boilerplate code. Each module may only use this macro once, and calling it replaces module_init and module_exit. Signed-off-by: William Breathitt Gray Signed-off-by: Greg Kroah-Hartman --- include/linux/isa.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/isa.h b/include/linux/isa.h index b0270e3814c8..e394917d18c2 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -36,4 +36,25 @@ static inline void isa_unregister_driver(struct isa_driver *d) } #endif +/** + * module_isa_driver() - Helper macro for registering a ISA driver + * @__isa_driver: isa_driver struct + * @__num_isa_dev: number of devices to register + * + * Helper macro for ISA drivers which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate code. Each module may only + * use this macro once, and calling it replaces module_init and module_exit. + */ +#define module_isa_driver(__isa_driver, __num_isa_dev) \ +static int __init __isa_driver##_init(void) \ +{ \ + return isa_register_driver(&(__isa_driver), __num_isa_dev); \ +} \ +module_init(__isa_driver##_init); \ +static void __exit __isa_driver##_exit(void) \ +{ \ + isa_unregister_driver(&(__isa_driver)); \ +} \ +module_exit(__isa_driver##_exit); + #endif /* __LINUX_ISA_H */ -- cgit v1.2.3 From d9a9c6172d1cec51851e9015b6c4379635c31f1a Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Sun, 1 May 2016 18:43:10 -0400 Subject: isa: Implement the max_num_isa_dev macro max_num_isa_dev is a macro to determine the maximum possible number of ISA devices which may be registered in the I/O port address space given the address extent of the ISA devices. The highest base address possible for an ISA device is 0x3FF; this results in 1024 possible base addresses. Dividing the number of possible base addresses by the address extent taken by each device results in the maximum number of devices on a system. Signed-off-by: William Breathitt Gray Signed-off-by: Greg Kroah-Hartman --- include/linux/isa.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/isa.h b/include/linux/isa.h index e394917d18c2..5ab85281230b 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -57,4 +57,15 @@ static void __exit __isa_driver##_exit(void) \ } \ module_exit(__isa_driver##_exit); +/** + * max_num_isa_dev() - Maximum possible number registered of an ISA device + * @__ida_dev_ext: ISA device address extent + * + * The highest base address possible for an ISA device is 0x3FF; this results in + * 1024 possible base addresses. Dividing the number of possible base addresses + * by the address extent taken by each device results in the maximum number of + * devices on a system. + */ +#define max_num_isa_dev(__isa_dev_ext) (1024 / __isa_dev_ext) + #endif /* __LINUX_ISA_H */ -- cgit v1.2.3 From 4d31c6109a24892df461b6a98842935e80159a5e Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 2 May 2016 14:09:10 -0500 Subject: PCI: imx6: Implement reset sequence for i.MX6+ I.MX6+ has a dedicated bit for resetting PCIe core, which should be used instead of a regular reset sequence since using the latter will hang the SoC. This commit is based on c34068d48273e24d392d9a49a38be807954420ed from http://git.freescale.com/git/cgit.cgi/imx/linux-2.6-imx.git Tested-by: Gary Bisson Signed-off-by: Andrey Smirnov Signed-off-by: Bjorn Helgaas Reviewed-by: Fabio Estevam --- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index 238c8db953eb..5b08e3c5325f 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -95,6 +95,7 @@ #define IMX6Q_GPR0_DMAREQ_MUX_SEL0_IOMUX BIT(0) #define IMX6Q_GPR1_PCIE_REQ_MASK (0x3 << 30) +#define IMX6Q_GPR1_PCIE_SW_RST BIT(29) #define IMX6Q_GPR1_PCIE_EXIT_L1 BIT(28) #define IMX6Q_GPR1_PCIE_RDY_L23 BIT(27) #define IMX6Q_GPR1_PCIE_ENTER_L1 BIT(26) -- cgit v1.2.3 From 85c7f81041d57cfe9dc97f4680d5586b54534a39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Apr 2016 19:52:13 -0400 Subject: beginning of transition to parallel lookups - marking in-lookup dentries marked as such when (would be) parallel lookup is about to pass them to actual ->lookup(); unmarked when * __d_add() is about to make it hashed, positive or not. * __d_move() (from d_splice_alias(), directly or via __d_unalias()) puts a preexisting dentry in its place * in caller of ->lookup() if it has escaped all of the above. Bug (WARN_ON, actually) if it reaches the final dput() or d_instantiate() while still marked such. As the result, we are guaranteed that for as long as the flag is set, dentry will * remain negative unhashed with positive refcount * never have its ->d_alias looked at * never have its ->d_lru looked at * never have its ->d_parent and ->d_name changed Right now we have at most one such for any given parent directory. With parallel lookups that restriction will weaken to * only exist when parent is locked shared * at most one with given (parent,name) pair (comparison of names is according to ->d_compare()) * only exist when there's no hashed dentry with the same (parent,name) Transition will take the next several commits; unfortunately, we'll only be able to switch to rwsem at the end of this series. The reason for not making it a single patch is to simplify review. New primitives: d_in_lookup() (a predicate checking if dentry is in the in-lookup state) and d_lookup_done() (tells the system that we are done with lookup and if it's still marked as in-lookup, it should cease to be such). Signed-off-by: Al Viro --- include/linux/dcache.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4bb4de8d95ea..9a7aa890b642 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -232,6 +232,8 @@ struct dentry_operations { #define DCACHE_ENCRYPTED_WITH_KEY 0x04000000 /* dir is encrypted with a valid key */ #define DCACHE_OP_REAL 0x08000000 +#define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ + extern seqlock_t rename_lock; /* @@ -367,6 +369,22 @@ static inline void dont_mount(struct dentry *dentry) spin_unlock(&dentry->d_lock); } +extern void __d_lookup_done(struct dentry *); + +static inline int d_in_lookup(struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_PAR_LOOKUP; +} + +static inline void d_lookup_done(struct dentry *dentry) +{ + if (unlikely(d_in_lookup(dentry))) { + spin_lock(&dentry->d_lock); + __d_lookup_done(dentry); + spin_unlock(&dentry->d_lock); + } +} + extern void dput(struct dentry *); static inline bool d_managed(const struct dentry *dentry) -- cgit v1.2.3 From 84e710da2a1dfacfc87f604869a4d22df91ce6cd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Apr 2016 00:58:55 -0400 Subject: parallel lookups machinery, part 2 We'll need to verify that there's neither a hashed nor in-lookup dentry with desired parent/name before adding to in-lookup set. One possible solution would be to hold the parent's ->d_lock through both checks, but while the in-lookup set is relatively small at any time, dcache is not. And holding the parent's ->d_lock through something like __d_lookup_rcu() would suck too badly. So we leave the parent's ->d_lock alone, which means that we watch out for the following scenario: * we verify that there's no hashed match * existing in-lookup match gets hashed by another process * we verify that there's no in-lookup matches and decide that everything's fine. Solution: per-directory kinda-sorta seqlock, bumped around the times we hash something that used to be in-lookup or move (and hash) something in place of in-lookup. Then the above would turn into * read the counter * do dcache lookup * if no matches found, check for in-lookup matches * if there had been none of those either, check if the counter has changed; repeat if it has. The "kinda-sorta" part is due to the fact that we don't have much spare space in inode. There is a spare word (shared with i_bdev/i_cdev/i_pipe), so the counter part is not a problem, but spinlock is a different story. We could use the parent's ->d_lock, and it would be less painful in terms of contention, for __d_add() it would be rather inconvenient to grab; we could do that (using lock_parent()), but... Fortunately, we can get serialization on the counter itself, and it might be a good idea in general; we can use cmpxchg() in a loop to get from even to odd and smp_store_release() from odd to even. This commit adds the counter and updating logics; the readers will be added in the next commit. Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6d0fa9174a24..00cecc5a2f75 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -684,6 +684,7 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; + unsigned i_dir_seq; }; __u32 i_generation; -- cgit v1.2.3 From 94bdd655caba2080ae81d83d756d325abdffcb9f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Apr 2016 02:42:04 -0400 Subject: parallel lookups machinery, part 3 We will need to be able to check if there is an in-lookup dentry with matching parent/name. Right now it's impossible, but as soon as start locking directories shared such beasts will appear. Add a secondary hash for locating those. Hash chains go through the same space where d_alias will be once it's not in-lookup anymore. Search is done under the same bitlock we use for modifications - with the primary hash we can rely on d_rehash() into the wrong chain being the worst that could happen, but here the pointers are buggered once it's removed from the chain. On the other hand, the chains are not going to be long and normally we'll end up adding to the chain anyway. That allows us to avoid bothering with ->d_lock when doing the comparisons - everything is stable until removed from chain. New helper: d_alloc_parallel(). Right now it allocates, verifies that no hashed and in-lookup matches exist and adds to in-lookup hash. Returns ERR_PTR() for error, hashed match (in the unlikely case it's been found) or new dentry. In-lookup matches trigger BUG() for now; that will change in the next commit when we introduce waiting for ongoing lookup to finish. Note that in-lookup matches won't be possible until we actually go for shared locking. lookup_slow() switched to use of d_alloc_parallel(). Again, these commits are separated only for making it easier to review. All this machinery will start doing something useful only when we go for shared locking; it's just that the combination is too large for my taste. Signed-off-by: Al Viro --- include/linux/dcache.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9a7aa890b642..3eea562f5f27 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -131,6 +131,7 @@ struct dentry { */ union { struct hlist_node d_alias; /* inode alias list */ + struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; } d_u; }; @@ -250,6 +251,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); +extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_exact_alias(struct dentry *, struct inode *); -- cgit v1.2.3 From d9171b9345261e0d941d92fdda5672b5db67f968 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Apr 2016 03:33:13 -0400 Subject: parallel lookups machinery, part 4 (and last) If we *do* run into an in-lookup match, we need to wait for it to cease being in-lookup. Fortunately, we do have unused space in in-lookup dentries - d_lru is never looked at until it stops being in-lookup. So we can stash a pointer to wait_queue_head from stack frame of the caller of ->lookup(). Some precautions are needed while waiting, but it's not that hard - we do hold a reference to dentry we are waiting for, so it can't go away. If it's found to be in-lookup the wait_queue_head is still alive and will remain so at least while ->d_lock is held. Moreover, the condition we are waiting for becomes true at the same point where everything on that wq gets woken up, so we can just add ourselves to the queue once. d_alloc_parallel() gets a pointer to wait_queue_head_t from its caller; lookup_slow() adjusted, d_add_ci() taught to use d_alloc_parallel() if the dentry passed to it happens to be in-lookup one (i.e. if it's been called from the parallel lookup). That's pretty much it - all that remains is to switch ->i_mutex to rwsem and have lookup_slow() take it shared. Signed-off-by: Al Viro --- include/linux/dcache.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 3eea562f5f27..6b1d8bbb3496 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -123,7 +123,10 @@ struct dentry { unsigned long d_time; /* used by d_revalidate */ void *d_fsdata; /* fs-specific data */ - struct list_head d_lru; /* LRU list */ + union { + struct list_head d_lru; /* LRU list */ + wait_queue_head_t *d_wait; /* in-lookup ones only */ + }; struct list_head d_child; /* child of parent list */ struct list_head d_subdirs; /* our children */ /* @@ -251,7 +254,8 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); -extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, + wait_queue_head_t *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_exact_alias(struct dentry *, struct inode *); -- cgit v1.2.3 From 9902af79c01a8e39bb99b922fa3eef6d4ea23d69 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Apr 2016 15:08:36 -0400 Subject: parallel lookups: actual switch to rwsem ta-da! The main issue is the lack of down_write_killable(), so the places like readdir.c switched to plain inode_lock(); once killable variants of rwsem primitives appear, that'll be dealt with. lockdep side also might need more work Signed-off-by: Al Viro --- include/linux/fs.h | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 00cecc5a2f75..3018f31f7aa0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -647,7 +647,7 @@ struct inode { /* Misc */ unsigned long i_state; - struct mutex i_mutex; + struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; @@ -734,27 +734,42 @@ enum inode_i_mutex_lock_class static inline void inode_lock(struct inode *inode) { - mutex_lock(&inode->i_mutex); + down_write(&inode->i_rwsem); } static inline void inode_unlock(struct inode *inode) { - mutex_unlock(&inode->i_mutex); + up_write(&inode->i_rwsem); +} + +static inline void inode_lock_shared(struct inode *inode) +{ + down_read(&inode->i_rwsem); +} + +static inline void inode_unlock_shared(struct inode *inode) +{ + up_read(&inode->i_rwsem); } static inline int inode_trylock(struct inode *inode) { - return mutex_trylock(&inode->i_mutex); + return down_write_trylock(&inode->i_rwsem); +} + +static inline int inode_trylock_shared(struct inode *inode) +{ + return down_read_trylock(&inode->i_rwsem); } static inline int inode_is_locked(struct inode *inode) { - return mutex_is_locked(&inode->i_mutex); + return rwsem_is_locked(&inode->i_rwsem); } static inline void inode_lock_nested(struct inode *inode, unsigned subclass) { - mutex_lock_nested(&inode->i_mutex, subclass); + down_write_nested(&inode->i_rwsem, subclass); } void lock_two_nondirectories(struct inode *, struct inode*); -- cgit v1.2.3 From 63b6df14134ddd048984c8afadb46e721815bfc6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 17:08:21 -0400 Subject: give readdir(2)/getdents(2)/etc. uniform exclusion with lseek() same as read() on regular files has, and for the same reason. Signed-off-by: Al Viro --- include/linux/file.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index f87d30882a24..7444f5feda12 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -44,6 +44,7 @@ extern struct file *fget_raw(unsigned int fd); extern unsigned long __fdget(unsigned int fd); extern unsigned long __fdget_raw(unsigned int fd); extern unsigned long __fdget_pos(unsigned int fd); +extern void __f_unlock_pos(struct file *); static inline struct fd __to_fd(unsigned long v) { @@ -60,6 +61,18 @@ static inline struct fd fdget_raw(unsigned int fd) return __to_fd(__fdget_raw(fd)); } +static inline struct fd fdget_pos(int fd) +{ + return __to_fd(__fdget_pos(fd)); +} + +static inline void fdput_pos(struct fd f) +{ + if (f.flags & FDPUT_POS_UNLOCK) + __f_unlock_pos(f.file); + fdput(f); +} + extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); -- cgit v1.2.3 From 6192269444ebfbfb42e23c7a6a93c76ffe4b5e51 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 23:08:32 -0400 Subject: introduce a parallel variant of ->iterate() New method: ->iterate_shared(). Same arguments as in ->iterate(), called with the directory locked only shared. Once all filesystems switch, the old one will be gone. Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3018f31f7aa0..3dc0258a2b64 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1674,6 +1674,7 @@ struct file_operations { ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); + int (*iterate_shared) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); -- cgit v1.2.3 From dcb0b5575d24a32f51a3f1003312fb94ed4e214a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 2 May 2016 21:30:04 -0400 Subject: tracing: Remove TRACE_EVENT_FL_USE_CALL_FILTER logic Nothing sets TRACE_EVENT_FL_USE_CALL_FILTER anymore. Remove it. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 3111a1efdad6..ba6456302e34 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -214,7 +214,6 @@ enum { TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, TRACE_EVENT_FL_WAS_ENABLED_BIT, - TRACE_EVENT_FL_USE_CALL_FILTER_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, TRACE_EVENT_FL_KPROBE_BIT, TRACE_EVENT_FL_UPROBE_BIT, @@ -229,7 +228,6 @@ enum { * WAS_ENABLED - Set and stays set when an event was ever enabled * (used for module unloading, if a module event is enabled, * it is best to clear the buffers that used it). - * USE_CALL_FILTER - For trace internal events, don't use file filter * TRACEPOINT - Event is a tracepoint * KPROBE - Event is a kprobe * UPROBE - Event is a uprobe @@ -240,7 +238,6 @@ enum { TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), - TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), -- cgit v1.2.3 From 8f510aeb223b26c4ffbece9fa92e4befea470f57 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 10 Nov 2015 20:31:07 +0900 Subject: PM / devfreq: Add devfreq_get_devfreq_by_phandle() This patch adds the new devfreq_get_devfreq_by_phandle() OF helper function which can find the instance of devfreq device by using phandle ("devfreq"). Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham [m.reichl and linux.amoon: Tested it on exynos4412-odroidu3 board] Tested-by: Markus Reichl Tested-by: Anand Moon Acked-by: Krzysztof Kozlowski --- include/linux/devfreq.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 6fa02a20eb63..aa0b8424ebc3 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -208,6 +208,9 @@ extern int devm_devfreq_register_opp_notifier(struct device *dev, extern void devm_devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq); +extern struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, + int index); + /** * devfreq_update_stats() - update the last_status pointer in struct devfreq * @df: the devfreq instance whose status needs updating @@ -307,6 +310,12 @@ static inline void devm_devfreq_unregister_opp_notifier(struct device *dev, { } +static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, + int index) +{ + return ERR_PTR(-ENODEV); +} + static inline int devfreq_update_stats(struct devfreq *df) { return -EINVAL; -- cgit v1.2.3 From 0fe3a66410a3ba96679be903f1e287d7a0a264a9 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 26 Jan 2016 13:21:26 +0900 Subject: PM / devfreq: Add new DEVFREQ_TRANSITION_NOTIFIER notifier This patch adds the new DEVFREQ_TRANSITION_NOTIFIER notifier to send the notification when the frequency of device is changed. This notifier has two state as following: - DEVFREQ_PRECHANGE : Notify it before chaning the frequency of device - DEVFREQ_POSTCHANGE : Notify it after changed the frequency of device And this patch adds the resourced-managed function to release the resource automatically when error happen. Signed-off-by: Chanwoo Choi [m.reichl and linux.amoon: Tested it on exynos4412-odroidu3 board] Tested-by: Markus Reichl Tested-by: Anand Moon Signed-off-by: MyungJoo Ham Acked-by: Krzysztof Kozlowski --- include/linux/devfreq.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index aa0b8424ebc3..98c699304e12 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -19,6 +19,13 @@ #define DEVFREQ_NAME_LEN 16 +/* DEVFREQ notifier interface */ +#define DEVFREQ_TRANSITION_NOTIFIER (0) + +/* Transition notifiers of DEVFREQ_TRANSITION_NOTIFIER */ +#define DEVFREQ_PRECHANGE (0) +#define DEVFREQ_POSTCHANGE (1) + struct devfreq; /** @@ -143,6 +150,7 @@ struct devfreq_governor { * @trans_table: Statistics of devfreq transitions * @time_in_state: Statistics of devfreq states * @last_stat_updated: The last time stat updated + * @transition_notifier_list: list head of DEVFREQ_TRANSITION_NOTIFIER notifier * * This structure stores the devfreq information for a give device. * @@ -177,6 +185,13 @@ struct devfreq { unsigned int *trans_table; unsigned long *time_in_state; unsigned long last_stat_updated; + + struct srcu_notifier_head transition_notifier_list; +}; + +struct devfreq_freqs { + unsigned long old; + unsigned long new; }; #if defined(CONFIG_PM_DEVFREQ) @@ -207,7 +222,20 @@ extern int devm_devfreq_register_opp_notifier(struct device *dev, struct devfreq *devfreq); extern void devm_devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq); - +extern int devfreq_register_notifier(struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list); +extern int devfreq_unregister_notifier(struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list); +extern int devm_devfreq_register_notifier(struct device *dev, + struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list); +extern void devm_devfreq_unregister_notifier(struct device *dev, + struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list); extern struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index); @@ -310,6 +338,35 @@ static inline void devm_devfreq_unregister_opp_notifier(struct device *dev, { } +static inline int devfreq_register_notifier(struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list) +{ + return 0; +} + +static inline int devfreq_unregister_notifier(struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list) +{ + return 0; +} + +static inline int devm_devfreq_register_notifier(struct device *dev, + struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list) +{ + return 0; +} + +static inline void devm_devfreq_unregister_notifier(struct device *dev, + struct devfreq *devfreq, + struct notifier_block *nb, + unsigned int list) +{ +} + static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index) { -- cgit v1.2.3 From 996133119f57334c38b020dbfaaac5b5eb127e29 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 22 Mar 2016 13:44:03 +0900 Subject: PM / devfreq: Add new passive governor This patch adds the new passive governor for DEVFREQ framework. The following governors are already present and used for DVFS (Dynamic Voltage and Frequency Scaling) drivers. The following governors are independently used for one device driver which don't give the influence to other device drviers and also don't receive the effect from other device drivers. - ondemand / performance / powersave / userspace The passive governor depends on operation of parent driver with specific governos extremely and is not able to decide the new frequency by oneself. According to the decided new frequency of parent driver with governor, the passive governor uses it to decide the appropriate frequency for own device driver. The passive governor must need the following information from device tree: - the source clock and OPP tables - the instance of parent device For exameple, there are one more devfreq device drivers which need to change their source clock according to their utilization on runtime. But, they share the same power line (e.g., regulator). So, specific device driver is operated as parent with ondemand governor and then the rest device driver with passive governor is influenced by parent device. Suggested-by: Myungjoo Ham Signed-off-by: Chanwoo Choi [tjakobi: Reported RCU locking issue and cw00.choi fix it] Reported-by: Tobias Jakobi [linux.amoon: Reported possible recursive locking and cw00.choi fix it] Reported-by: Anand Moon Signed-off-by: MyungJoo Ham Acked-by: Krzysztof Kozlowski --- include/linux/devfreq.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 98c699304e12..2de4e2eea180 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -272,6 +272,39 @@ struct devfreq_simple_ondemand_data { }; #endif +#if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE) +/** + * struct devfreq_passive_data - void *data fed to struct devfreq + * and devfreq_add_device + * @parent: the devfreq instance of parent device. + * @get_target_freq: Optional callback, Returns desired operating frequency + * for the device using passive governor. That is called + * when passive governor should decide the next frequency + * by using the new frequency of parent devfreq device + * using governors except for passive governor. + * If the devfreq device has the specific method to decide + * the next frequency, should use this callback. + * @this: the devfreq instance of own device. + * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER list + * + * The devfreq_passive_data have to set the devfreq instance of parent + * device with governors except for the passive governor. But, don't need to + * initialize the 'this' and 'nb' field because the devfreq core will handle + * them. + */ +struct devfreq_passive_data { + /* Should set the devfreq instance of parent device */ + struct devfreq *parent; + + /* Optional callback to decide the next frequency of passvice device */ + int (*get_target_freq)(struct devfreq *this, unsigned long *freq); + + /* For passive governor's internal use. Don't need to set them */ + struct devfreq *this; + struct notifier_block nb; +}; +#endif + #else /* !CONFIG_PM_DEVFREQ */ static inline struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, -- cgit v1.2.3 From c0ef079ca791ef9e057ac748051425a768c9e192 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 May 2016 03:29:09 +0200 Subject: netdevice: shrink size of struct netdev_queue - trans_timeout is incremented when tx queue timed out (tx watchdog). - tx_maxrate is set via sysfs Moving tx_maxrate to read-mostly part shrinks the struct by 64 bytes. While at it, also move trans_timeout (it is out-of-place in the 'write-mostly' part). Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 52914a854386..f2182594160e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -569,6 +569,12 @@ struct netdev_queue { #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) int numa_node; #endif + unsigned long tx_maxrate; + /* + * Number of TX timeouts for this queue + * (/sys/class/net/DEV/Q/trans_timeout) + */ + unsigned long trans_timeout; /* * write-mostly part */ @@ -579,18 +585,11 @@ struct netdev_queue { */ unsigned long trans_start; - /* - * Number of TX timeouts for this queue - * (/sys/class/net/DEV/Q/trans_timeout) - */ - unsigned long trans_timeout; - unsigned long state; #ifdef CONFIG_BQL struct dql dql; #endif - unsigned long tx_maxrate; } ____cacheline_aligned_in_smp; static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) -- cgit v1.2.3 From 9580bf2edb402b3afaf9c5a4efb6953f993ef52e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 30 Apr 2016 10:19:29 -0700 Subject: net: relax expensive skb_unclone() in iptunnel_handle_offloads() Locally generated TCP GSO packets having to go through a GRE/SIT/IPIP tunnel have to go through an expensive skb_unclone() Reallocating skb->head is a lot of work. Test should really check if a 'real clone' of the packet was done. TCP does not care if the original gso_type is changed while the packet travels in the stack. This adds skb_header_unclone() which is a variant of skb_clone() using skb_header_cloned() check instead of skb_cloned(). This variant can probably be used from other points. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c84a5a1078c5..c413c588a24f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1325,6 +1325,16 @@ static inline int skb_header_cloned(const struct sk_buff *skb) return dataref != 1; } +static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(gfpflags_allow_blocking(pri)); + + if (skb_header_cloned(skb)) + return pskb_expand_head(skb, 0, 0, pri); + + return 0; +} + /** * skb_header_release - release reference to header * @skb: buffer to operate on -- cgit v1.2.3 From 2a74213838104a41588d86fd5e8d344972891ace Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 14 Apr 2016 23:20:04 +0300 Subject: signals/sigaltstack: Implement SS_AUTODISARM flag This patch implements the SS_AUTODISARM flag that can be OR-ed with SS_ONSTACK when forming ss_flags. When this flag is set, sigaltstack will be disabled when entering the signal handler; more precisely, after saving sas to uc_stack. When leaving the signal handler, the sigaltstack is restored by uc_stack. When this flag is used, it is safe to switch from sighandler with swapcontext(). Without this flag, the subsequent signal will corrupt the state of the switched-away sighandler. To detect the support of this functionality, one can do: err = sigaltstack(SS_DISABLE | SS_AUTODISARM); if (err && errno == EINVAL) unsupported(); Signed-off-by: Stas Sergeev Cc: Al Viro Cc: Aleksa Sarai Cc: Amanieu d'Antras Cc: Andrea Arcangeli Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Eric W. Biederman Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Heinrich Schuchardt Cc: Jason Low Cc: Josh Triplett Cc: Konstantin Khlebnikov Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Palmer Dabbelt Cc: Paul Moore Cc: Pavel Emelyanov Cc: Peter Zijlstra Cc: Richard Weinberger Cc: Sasha Levin Cc: Shuah Khan Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: linux-api@vger.kernel.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1460665206-13646-4-git-send-email-stsp@list.ru Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++++++ include/linux/signal.h | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 52c4847b05e2..2950c5cd3005 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1596,6 +1596,7 @@ struct task_struct { unsigned long sas_ss_sp; size_t sas_ss_size; + unsigned sas_ss_flags; struct callback_head *task_works; @@ -2592,6 +2593,13 @@ static inline int sas_ss_flags(unsigned long sp) return on_sig_stack(sp) ? SS_ONSTACK : 0; } +static inline void sas_ss_reset(struct task_struct *p) +{ + p->sas_ss_sp = 0; + p->sas_ss_size = 0; + p->sas_ss_flags = SS_DISABLE; +} + static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) { if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) diff --git a/include/linux/signal.h b/include/linux/signal.h index 92557bbce7e7..3fbe81444d31 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -432,8 +432,10 @@ int __save_altstack(stack_t __user *, unsigned long); stack_t __user *__uss = uss; \ struct task_struct *t = current; \ put_user_ex((void __user *)t->sas_ss_sp, &__uss->ss_sp); \ - put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \ + put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ put_user_ex(t->sas_ss_size, &__uss->ss_size); \ + if (t->sas_ss_flags & SS_AUTODISARM) \ + sas_ss_reset(t); \ } while (0); #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From 502d6df11ae394301470703fa6e485a0dc133401 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Mon, 11 Apr 2016 16:32:54 +0100 Subject: irqchip/gic-v2: Parse and export virtual GIC information For now, the firmware tables are parsed 2 times: once in the GIC drivers, the other timer when initializing the vGIC. It means code duplication and make more tedious to add the support for another firmware table (like ACPI). Introduce a new structure and set of helpers to get/set the virtual GIC information. Also fill up the structure for GICv2. Signed-off-by: Julien Grall Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-common.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 include/linux/irqchip/arm-gic-common.h (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h new file mode 100644 index 000000000000..ef34f6f35e91 --- /dev/null +++ b/include/linux/irqchip/arm-gic-common.h @@ -0,0 +1,33 @@ +/* + * include/linux/irqchip/arm-gic-common.h + * + * Copyright (C) 2016 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H +#define __LINUX_IRQCHIP_ARM_GIC_COMMON_H + +#include +#include + +enum gic_type { + GIC_V2, +}; + +struct gic_kvm_info { + /* GIC type */ + enum gic_type type; + /* Virtual CPU interface */ + struct resource vcpu; + /* Interrupt number */ + unsigned int maint_irq; + /* Virtual control interface */ + struct resource vctrl; +}; + +const struct gic_kvm_info *gic_get_kvm_info(void); + +#endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */ -- cgit v1.2.3 From 1839e576968f34b9a31da9f0033f8de12a1c9de6 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Mon, 11 Apr 2016 16:32:57 +0100 Subject: irqchip/gic-v3: Parse and export virtual GIC information Fill up the recently introduced gic_kvm_info with the hardware information used for virtualization. Signed-off-by: Julien Grall Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index ef34f6f35e91..c647b0547bcd 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -15,6 +15,7 @@ enum gic_type { GIC_V2, + GIC_V3, }; struct gic_kvm_info { -- cgit v1.2.3 From e39c0df1be5a97e0910b09af1530bdf3de057a06 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 14 Apr 2016 21:17:21 +0200 Subject: pwm: Introduce the pwm_args concept Currently the PWM core mixes the current PWM state with the per-platform reference config (specified through the PWM lookup table, DT definition or directly hardcoded in PWM drivers). Create a struct pwm_args to store this reference configuration, so that PWM users can differentiate between the current and reference configurations. Patch all places where pwm->args should be initialized. We keep the pwm_set_polarity/period() calls until all PWM users are patched to use pwm_args instead of pwm_get_period/polarity(). Signed-off-by: Boris Brezillon [thierry.reding@gmail.com: reword kerneldoc comments] Signed-off-by: Thierry Reding --- include/linux/pwm.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index cfc3ed46cad2..b78d27c42629 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -74,6 +74,24 @@ enum pwm_polarity { PWM_POLARITY_INVERSED, }; +/** + * struct pwm_args - board-dependent PWM arguments + * @period: reference period + * @polarity: reference polarity + * + * This structure describes board-dependent arguments attached to a PWM + * device. These arguments are usually retrieved from the PWM lookup table or + * device tree. + * + * Do not confuse this with the PWM state: PWM arguments represent the initial + * configuration that users want to use on this PWM device rather than the + * current PWM hardware state. + */ +struct pwm_args { + unsigned int period; + enum pwm_polarity polarity; +}; + enum { PWMF_REQUESTED = 1 << 0, PWMF_ENABLED = 1 << 1, @@ -92,6 +110,7 @@ enum { * @period: period of the PWM signal (in nanoseconds) * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) * @polarity: polarity of the PWM signal + * @args: PWM arguments */ struct pwm_device { const char *label; @@ -105,6 +124,8 @@ struct pwm_device { unsigned int period; unsigned int duty_cycle; enum pwm_polarity polarity; + + struct pwm_args args; }; static inline bool pwm_is_enabled(const struct pwm_device *pwm) @@ -144,6 +165,18 @@ static inline enum pwm_polarity pwm_get_polarity(const struct pwm_device *pwm) return pwm ? pwm->polarity : PWM_POLARITY_NORMAL; } +static inline void pwm_get_args(const struct pwm_device *pwm, + struct pwm_args *args) +{ + *args = pwm->args; +} + +static inline void pwm_apply_args(struct pwm_device *pwm) +{ + pwm_set_period(pwm, pwm->args.period); + pwm_set_polarity(pwm, pwm->args.polarity); +} + /** * struct pwm_ops - PWM controller operations * @request: optional hook for requesting a PWM -- cgit v1.2.3 From 10126ac14d36e74b2705802dc915b0b18463a51f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 2 May 2016 15:10:31 -0500 Subject: PCI: Add Downstream Port Containment portdrv service type Add the Downstream Port Containment (PCIE_PORT_SERVICE_DPC) portdrv service type, available if the device has the DPC extended capability. [bhelgaas: split to separate patch, changelog] Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas --- include/linux/pcieport_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index 4f1089f2cc98..afcd130ab3a9 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -21,6 +21,8 @@ #define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) #define PCIE_PORT_SERVICE_VC_SHIFT 3 /* Virtual Channel */ #define PCIE_PORT_SERVICE_VC (1 << PCIE_PORT_SERVICE_VC_SHIFT) +#define PCIE_PORT_SERVICE_DPC_SHIFT 4 /* Downstream Port Containment */ +#define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT) struct pcie_device { int irq; /* Service IRQ/MSI/MSI-X Vector */ -- cgit v1.2.3 From e511267bc25e18926826e7cccdf7872bcbb4776a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 26 Apr 2016 11:38:20 +0100 Subject: io-64-nonatomic: Add relaxed accessor variants Whilst commit 9439eb3ab9d1 ("asm-generic: io: implement relaxed accessor macros as conditional wrappers") makes the *_relaxed forms of I/O accessors universally available to drivers, in cases where writeq() is implemented via the io-64-nonatomic helpers, writeq_relaxed() will end up falling back to writel() regardless of whether writel_relaxed() is available (identically for s/write/read/). Add corresponding relaxed forms of the nonatomic helpers to delegate to the equivalent 32-bit accessors as appropriate. We also need to fix io.h to avoid defining default relaxed variants if the basic accessors themselves don't exist. CC: Christoph Hellwig CC: Darren Hart CC: Hitoshi Mitake Acked-by: Arnd Bergmann Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- include/linux/io-64-nonatomic-hi-lo.h | 25 +++++++++++++++++++++++++ include/linux/io-64-nonatomic-lo-hi.h | 25 +++++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index 11d7e840d913..defcc4644ce3 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -21,6 +21,23 @@ static inline void hi_lo_writeq(__u64 val, volatile void __iomem *addr) writel(val, addr); } +static inline __u64 hi_lo_readq_relaxed(const volatile void __iomem *addr) +{ + const volatile u32 __iomem *p = addr; + u32 low, high; + + high = readl_relaxed(p + 1); + low = readl_relaxed(p); + + return low + ((u64)high << 32); +} + +static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr) +{ + writel_relaxed(val >> 32, addr + 4); + writel_relaxed(val, addr); +} + #ifndef readq #define readq hi_lo_readq #endif @@ -29,4 +46,12 @@ static inline void hi_lo_writeq(__u64 val, volatile void __iomem *addr) #define writeq hi_lo_writeq #endif +#ifndef readq_relaxed +#define readq_relaxed hi_lo_readq_relaxed +#endif + +#ifndef writeq_relaxed +#define writeq_relaxed hi_lo_writeq_relaxed +#endif + #endif /* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */ diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index 1a4315f97360..084461a4e5ab 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -21,6 +21,23 @@ static inline void lo_hi_writeq(__u64 val, volatile void __iomem *addr) writel(val >> 32, addr + 4); } +static inline __u64 lo_hi_readq_relaxed(const volatile void __iomem *addr) +{ + const volatile u32 __iomem *p = addr; + u32 low, high; + + low = readl_relaxed(p); + high = readl_relaxed(p + 1); + + return low + ((u64)high << 32); +} + +static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr) +{ + writel_relaxed(val, addr); + writel_relaxed(val >> 32, addr + 4); +} + #ifndef readq #define readq lo_hi_readq #endif @@ -29,4 +46,12 @@ static inline void lo_hi_writeq(__u64 val, volatile void __iomem *addr) #define writeq lo_hi_writeq #endif +#ifndef readq_relaxed +#define readq_relaxed lo_hi_readq_relaxed +#endif + +#ifndef writeq_relaxed +#define writeq_relaxed lo_hi_writeq_relaxed +#endif + #endif /* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */ -- cgit v1.2.3 From 6fb650d43da3e7054984dc548eaa88765a94d49f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 29 Apr 2016 15:25:17 -0400 Subject: USB: leave LPM alone if possible when binding/unbinding interface drivers When a USB driver is bound to an interface (either through probing or by claiming it) or is unbound from an interface, the USB core always disables Link Power Management during the transition and then re-enables it afterward. The reason is because the driver might want to prevent hub-initiated link power transitions, in which case the HCD would have to recalculate the various LPM parameters. This recalculation takes place when LPM is re-enabled and the new parameters are sent to the device and its parent hub. However, if the driver does not want to prevent hub-initiated link power transitions then none of this work is necessary. The parameters don't need to be recalculated, and LPM doesn't need to be disabled and re-enabled. It turns out that disabling and enabling LPM can be time-consuming, enough so that it interferes with user programs that want to claim and release interfaces rapidly via usbfs. Since the usbfs kernel driver doesn't set the disable_hub_initiated_lpm flag, we can speed things up and get the user programs to work by leaving LPM alone whenever the flag isn't set. And while we're improving the way disable_hub_initiated_lpm gets used, let's also fix its kerneldoc. Signed-off-by: Alan Stern Tested-by: Matthew Giassa CC: Mathias Nyman CC: Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 01b6c61cf9bb..eba1f10e8cfd 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1068,7 +1068,7 @@ struct usbdrv_wrap { * for interfaces bound to this driver. * @soft_unbind: if set to 1, the USB core will not kill URBs and disable * endpoints before calling the driver's disconnect method. - * @disable_hub_initiated_lpm: if set to 0, the USB core will not allow hubs + * @disable_hub_initiated_lpm: if set to 1, the USB core will not allow hubs * to initiate lower power link state transitions when an idle timeout * occurs. Device-initiated USB 3.0 link PM will still be allowed. * -- cgit v1.2.3 From 8e996a2874bbbed30e8dfe881453825fc6b7654e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 3 May 2016 11:33:37 -0600 Subject: stm class: Support devices that override software assigned masters Some STM devices adjust software assigned master numbers depending on the trace source and its runtime state and whatnot. This patch adds a sysfs attribute to inform the trace-side software that master numbers assigned to software sources will not match those in the STP stream, so that, for example, master/channel allocation policy can be adjusted accordingly. Signed-off-by: Alexander Shishkin Signed-off-by: Greg Kroah-Hartman --- include/linux/stm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stm.h b/include/linux/stm.h index 1a79ed8e43da..8369d8a8cabd 100644 --- a/include/linux/stm.h +++ b/include/linux/stm.h @@ -50,6 +50,8 @@ struct stm_device; * @sw_end: last STP master available to software * @sw_nchannels: number of STP channels per master * @sw_mmiosz: size of one channel's IO space, for mmap, optional + * @hw_override: masters in the STP stream will not match the ones + * assigned by software, but are up to the STM hardware * @packet: callback that sends an STP packet * @mmio_addr: mmap callback, optional * @link: called when a new stm_source gets linked to us, optional @@ -85,6 +87,7 @@ struct stm_data { unsigned int sw_end; unsigned int sw_nchannels; unsigned int sw_mmiosz; + unsigned int hw_override; ssize_t (*packet)(struct stm_data *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, -- cgit v1.2.3 From 237483aa5cf43105d148d3f03b29eed47c3e6cf9 Mon Sep 17 00:00:00 2001 From: Pratik Patel Date: Tue, 3 May 2016 11:33:40 -0600 Subject: coresight: stm: adding driver for CoreSight STM component This driver adds support for the STM CoreSight IP block, allowing any system compoment (HW or SW) to log and aggregate messages via a single entity. The CoreSight STM exposes an application defined number of channels called stimulus port. Configuration is done using entries in sysfs and channels made available to userspace via configfs. Signed-off-by: Pratik Patel Signed-off-by: Mathieu Poirier Reviewed-by: Michael Williams Signed-off-by: Chunyan Zhang Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight-stm.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/coresight-stm.h (limited to 'include/linux') diff --git a/include/linux/coresight-stm.h b/include/linux/coresight-stm.h new file mode 100644 index 000000000000..a978bb85599a --- /dev/null +++ b/include/linux/coresight-stm.h @@ -0,0 +1,6 @@ +#ifndef __LINUX_CORESIGHT_STM_H_ +#define __LINUX_CORESIGHT_STM_H_ + +#include + +#endif -- cgit v1.2.3 From 18d28819809909c3f24bb72183a901c5e332a63d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 3 May 2016 09:46:22 +0200 Subject: mcb: Correctly initialize the bus's device The mcb bus' device member wasn't correctly initialized and thus wasn't placed correctly into the driver model. Signed-off-by: Johannes Thumshirn Reviewed-by: Andreas Werner Tested-by: Andreas Werner Signed-off-by: Greg Kroah-Hartman --- include/linux/mcb.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mcb.h b/include/linux/mcb.h index ed06e15a36aa..3efafbca166d 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -21,13 +21,12 @@ struct mcb_device; /** * struct mcb_bus - MEN Chameleon Bus * - * @dev: pointer to carrier device - * @children: the child busses + * @dev: bus device + * @carrier: pointer to carrier device * @bus_nr: mcb bus number * @get_irq: callback to get IRQ number */ struct mcb_bus { - struct list_head children; struct device dev; struct device *carrier; int bus_nr; -- cgit v1.2.3 From 803f1ca60d5c0107adfbce4e2d70488598b03a80 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 3 May 2016 09:46:23 +0200 Subject: mcb: export bus information via sysfs Export information about the bus stored in the FPGA's header to userspace via sysfs, instead of hiding it in pr_debug()s from everyone. Signed-off-by: Johannes Thumshirn Reviewed-by: Andreas Werner Tested-by: Andreas Werner Signed-off-by: Greg Kroah-Hartman --- include/linux/mcb.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 3efafbca166d..ead13d233a97 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -15,6 +15,8 @@ #include #include +#define CHAMELEON_FILENAME_LEN 12 + struct mcb_driver; struct mcb_device; @@ -25,11 +27,18 @@ struct mcb_device; * @carrier: pointer to carrier device * @bus_nr: mcb bus number * @get_irq: callback to get IRQ number + * @revision: the FPGA's revision number + * @model: the FPGA's model number + * @filename: the FPGA's name */ struct mcb_bus { struct device dev; struct device *carrier; int bus_nr; + u8 revision; + char model; + u8 minor; + char name[CHAMELEON_FILENAME_LEN + 1]; int (*get_irq)(struct mcb_device *dev); }; #define to_mcb_bus(b) container_of((b), struct mcb_bus, dev) -- cgit v1.2.3 From c876eeab6432687846d4cd5fe1e43dbc348de134 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 3 May 2016 10:31:49 -0700 Subject: signals/sigaltstack: If SS_AUTODISARM, bypass on_sig_stack() If a signal stack is set up with SS_AUTODISARM, then the kernel inherently avoids incorrectly resetting the signal stack if signals recurse: the signal stack will be reset on the first signal delivery. This means that we don't need check the stack pointer when delivering signals if SS_AUTODISARM is set. This will make segmented x86 programs more robust: currently there's a hole that could be triggered if ESP/RSP appears to point to the signal stack but actually doesn't due to a nonzero SS base. Signed-off-by: Andy Lutomirski Cc: Al Viro Cc: Aleksa Sarai Cc: Amanieu d'Antras Cc: Andrea Arcangeli Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Eric W. Biederman Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Heinrich Schuchardt Cc: Jason Low Cc: Josh Triplett Cc: Konstantin Khlebnikov Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Palmer Dabbelt Cc: Paul Moore Cc: Pavel Emelyanov Cc: Peter Zijlstra Cc: Richard Weinberger Cc: Sasha Levin Cc: Shuah Khan Cc: Stas Sergeev Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/c46bee4654ca9e68c498462fd11746e2bd0d98c8.1462296606.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2950c5cd3005..77fd49f20c5f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2576,6 +2576,18 @@ static inline int kill_cad_pid(int sig, int priv) */ static inline int on_sig_stack(unsigned long sp) { + /* + * If the signal stack is SS_AUTODISARM then, by construction, we + * can't be on the signal stack unless user code deliberately set + * SS_AUTODISARM when we were already on it. + * + * This improves reliability: if user state gets corrupted such that + * the stack pointer points very close to the end of the signal stack, + * then this check will enable the signal to be handled anyway. + */ + if (current->sas_ss_flags & SS_AUTODISARM) + return 0; + #ifdef CONFIG_STACK_GROWSUP return sp >= current->sas_ss_sp && sp - current->sas_ss_sp < current->sas_ss_size; -- cgit v1.2.3 From 3d376fb2ea907f0c1bbccf87125456439feb4ed4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 2 May 2016 22:25:40 +0200 Subject: mmc: tmio/sdhi: introduce flag for RCar 2+ specific features RCar Gen2 and later implementations of TMIO/SDHI have their own set of features and additions. FAST_CLK_CHG is just one of them and I see a few others being added soon. Some may work on older chipsets but this needs to be tested case by case. Instead of adding a bunch of flags for each feature, add a global RCar2+ one for now. We can still break out features if the need arises. Signed-off-by: Wolfram Sang Tested-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 05d58ee5e6a7..7a26286db895 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -66,8 +66,8 @@ */ #define TMIO_MMC_SDIO_IRQ (1 << 2) -/* Some controllers don't need to wait 10ms for clock changes */ -#define TMIO_MMC_FAST_CLK_CHG (1 << 3) +/* Some features are only available or tested on RCar Gen2 or later */ +#define TMIO_MMC_MIN_RCAR2 (1 << 3) /* * Some controllers require waiting for the SD bus to become -- cgit v1.2.3 From efdc810ba39dae0ccce9cb9c1c84ff9b0157ca43 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 3 May 2016 17:13:54 +0300 Subject: net/mlx5: Flow steering, Add vport ACL support Update the relevant flow steering device structs and commands to support vport. Update the flow steering core API to receive vport number. Add ingress and egress ACL flow table name spaces. Add ACL flow table support: * ACL (Access Control List) flow table is a table that contains only allow/drop steering rules. * We have two types of ACL flow tables - ingress and egress. * ACLs handle traffic sent from/to E-Switch FDB table, Ingress refers to traffic sent from Vport to E-Switch and Egress refers to traffic sent from E-Switch to vport. * Ingress ACL flow table allow/drop rules is checked against traffic sent from VF. * Egress ACL flow table allow/drop rules is checked against traffic sent to VF. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 12 ++++++++++++ include/linux/mlx5/driver.h | 2 ++ include/linux/mlx5/fs.h | 7 +++++++ 3 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8fecd6d6f814..ee0d5a937f02 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1349,6 +1349,18 @@ enum mlx5_cap_type { #define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) +#define MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_egress.cap) + +#define MLX5_CAP_ESW_EGRESS_ACL_MAX(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_egress.cap) + +#define MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_ingress.cap) + +#define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap) + #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d5529449ef47..9613143f0561 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -518,6 +518,8 @@ struct mlx5_priv { unsigned long pci_dev_data; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_root_namespace *esw_egress_root_ns; + struct mlx5_flow_root_namespace *esw_ingress_root_ns; }; enum mlx5_device_state { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 165ff4f9cc6a..6467569ad76e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -58,6 +58,8 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_ANCHOR, MLX5_FLOW_NAMESPACE_FDB, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, }; struct mlx5_flow_table; @@ -90,6 +92,11 @@ mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, u32 level); +struct mlx5_flow_table * +mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + u32 level, u16 vport); int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); /* inbox should be set with the following values: -- cgit v1.2.3 From ba162f8eed61a7e71e26455ce1cff5b5898a3579 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 May 2016 16:31:00 +0200 Subject: netdevice: add helper to update trans_start trans_start exists twice: - as member of net_device (legacy) - as member of netdev_queue In order to get rid of the legacy case, add a helper for the dev->trans_update (this patch), then convert spots that do dev->trans_start = jiffies to use this helper (next patch). This would then allow us to change the helper so that it updates the trans_stamp of netdev queue 0 instead. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bcf012637d10..f53412cccbaa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3481,6 +3481,12 @@ static inline void txq_trans_update(struct netdev_queue *txq) txq->trans_start = jiffies; } +/* legacy drivers only, netdev_start_xmit() sets txq->trans_start */ +static inline void netif_trans_update(struct net_device *dev) +{ + dev->trans_start = jiffies; +} + /** * netif_tx_lock - grab network device transmit lock * @dev: network device -- cgit v1.2.3 From 9b36627acecd5792e81daf1a3bff8eab39ed45fb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 May 2016 16:33:14 +0200 Subject: net: remove dev->trans_start previous patches removed all direct accesses to dev->trans_start, so change the netif_trans_update helper to update trans_start of netdev queue 0 instead and then remove trans_start from struct net_device. AFAICS a lot of the netif_trans_update() invocations are now useless because they occur in ndo_start_xmit and driver doesn't set LLTX (i.e. stack already took care of the update). As I can't test any of them it seems better to just leave them alone. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f53412cccbaa..63580e6d0df4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -581,7 +581,7 @@ struct netdev_queue { spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; /* - * please use this field instead of dev->trans_start + * Time (in jiffies) of last Tx */ unsigned long trans_start; @@ -1545,7 +1545,6 @@ enum netdev_priv_flags { * * @offload_fwd_mark: Offload device fwding mark * - * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1794,13 +1793,6 @@ struct net_device { #endif /* These may be needed for future network-power-down code. */ - - /* - * trans_start here is expensive for high speed devices on SMP, - * please use netdev_queue->trans_start instead. - */ - unsigned long trans_start; - struct timer_list watchdog_timer; int __percpu *pcpu_refcnt; @@ -3484,7 +3476,10 @@ static inline void txq_trans_update(struct netdev_queue *txq) /* legacy drivers only, netdev_start_xmit() sets txq->trans_start */ static inline void netif_trans_update(struct net_device *dev) { - dev->trans_start = jiffies; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + + if (txq->trans_start != jiffies) + txq->trans_start = jiffies; } /** -- cgit v1.2.3 From 8320f495cf441d593f7cd4f30e6b63455be71a2c Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 4 May 2016 22:15:27 +0200 Subject: i2c: allow adapter drivers to override the adapter locking Add i2c_lock_bus() and i2c_unlock_bus(), which call the new lock_bus and unlock_bus ops in the adapter. These funcs/ops take an additional flags argument that indicates for what purpose the adapter is locked. There are two flags, I2C_LOCK_ROOT_ADAPTER and I2C_LOCK_SEGMENT, but they are both implemented the same. For now. Locking the root adapter means that the whole bus is locked, locking the segment means that only the current bus segment is locked (i.e. i2c traffic on the parent side of a mux is still allowed even if the child side of the mux is locked). Also support a trylock_bus op (but no function to call it, as it is not expected to be needed outside of the i2c core). Implement i2c_lock_adapter/i2c_unlock_adapter in terms of the new locking scheme (i.e. lock with the I2C_LOCK_ROOT_ADAPTER flag). Locking the root adapter and locking the segment is the same thing for all root adapters (e.g. in the normal case of a simple topology with no i2c muxes). The two locking variants are also the same for traditional muxes (aka parent-locked muxes). These muxes traverse the tree, locking each level as they go until they reach the root. This patch is preparatory for a later patch in the series introducing mux-locked muxes, which behave differently depending on the requested locking. Since all current users are using i2c_lock_adapter, which is a wrapper for I2C_LOCK_ROOT_ADAPTER, we only need to annotate the calls that will not need to lock the root adapter for mux-locked muxes. I.e. the instances that needs to use I2C_LOCK_SEGMENT instead of i2c_lock_adapter/I2C_LOCK_ROOT_ADAPTER. Those instances are in the i2c_transfer and i2c_smbus_xfer functions, so that mux-locked muxes can single out normal i2c accesses to its slave side and adjust the locking for those accesses. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index c30833b7b073..50934d6e1050 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -538,6 +538,10 @@ struct i2c_adapter { struct i2c_bus_recovery_info *bus_recovery_info; const struct i2c_adapter_quirks *quirks; + + void (*lock_bus)(struct i2c_adapter *, unsigned int flags); + int (*trylock_bus)(struct i2c_adapter *, unsigned int flags); + void (*unlock_bus)(struct i2c_adapter *, unsigned int flags); }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) @@ -567,8 +571,44 @@ i2c_parent_is_i2c_adapter(const struct i2c_adapter *adapter) int i2c_for_each_dev(void *data, int (*fn)(struct device *, void *)); /* Adapter locking functions, exported for shared pin cases */ -void i2c_lock_adapter(struct i2c_adapter *); -void i2c_unlock_adapter(struct i2c_adapter *); +#define I2C_LOCK_ROOT_ADAPTER BIT(0) +#define I2C_LOCK_SEGMENT BIT(1) + +/** + * i2c_lock_bus - Get exclusive access to an I2C bus segment + * @adapter: Target I2C bus segment + * @flags: I2C_LOCK_ROOT_ADAPTER locks the root i2c adapter, I2C_LOCK_SEGMENT + * locks only this branch in the adapter tree + */ +static inline void +i2c_lock_bus(struct i2c_adapter *adapter, unsigned int flags) +{ + adapter->lock_bus(adapter, flags); +} + +/** + * i2c_unlock_bus - Release exclusive access to an I2C bus segment + * @adapter: Target I2C bus segment + * @flags: I2C_LOCK_ROOT_ADAPTER unlocks the root i2c adapter, I2C_LOCK_SEGMENT + * unlocks only this branch in the adapter tree + */ +static inline void +i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags) +{ + adapter->unlock_bus(adapter, flags); +} + +static inline void +i2c_lock_adapter(struct i2c_adapter *adapter) +{ + i2c_lock_bus(adapter, I2C_LOCK_ROOT_ADAPTER); +} + +static inline void +i2c_unlock_adapter(struct i2c_adapter *adapter) +{ + i2c_unlock_bus(adapter, I2C_LOCK_ROOT_ADAPTER); +} /*flags for the client struct: */ #define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ -- cgit v1.2.3 From 6ef91fcca8a8ba3df9810a4cc6cd6a9d3f21bf45 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 4 May 2016 22:15:29 +0200 Subject: i2c: mux: relax locking of the top i2c adapter during mux-locked muxing With a i2c topology like the following GPIO ---| ------ BAT1 | v / I2C -----+----------+---- MUX | \ EEPROM ------ BAT2 there is a locking problem with the GPIO controller since it is a client on the same i2c bus that it muxes. Transfers to the mux clients (e.g. BAT1) will lock the whole i2c bus prior to attempting to switch the mux to the correct i2c segment. In the above case, the GPIO device is an I/O expander with an i2c interface, and since the GPIO subsystem knows nothing (and rightfully so) about the lockless needs of the i2c mux code, this results in a deadlock when the GPIO driver issues i2c transfers to modify the mux. So, observing that while it is needed to have the i2c bus locked during the actual MUX update in order to avoid random garbage on the slave side, it is not strictly a must to have it locked over the whole sequence of a full select-transfer-deselect mux client operation. The mux itself needs to be locked, so transfers to clients behind the mux are serialized, and the mux needs to be stable during all i2c traffic (otherwise individual mux slave segments might see garbage, or worse). Introduce this new locking concept as "mux-locked" muxes, and call the pre-existing mux locking scheme "parent-locked". Modify the i2c mux locking so that muxes that are "mux-locked" locks only the muxes on the parent adapter instead of the whole i2c bus when there is a transfer to the slave side of the mux. This lock serializes transfers to the slave side of the muxes on the parent adapter. Add code to i2c-mux-gpio and i2c-mux-pinctrl that checks if all involved gpio/pinctrl devices have a parent that is an i2c adapter in the same adapter tree that is muxed, and request a "mux-locked mux" if that is the case. Modify the select-transfer-deselect code for "mux-locked" muxes so that each of the select-transfer-deselect ops locks the mux parent adapter individually. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- include/linux/i2c-mux.h | 8 ++++++++ include/linux/i2c.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index 2fa93fe1345e..d4c1d12f900d 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -27,9 +27,12 @@ #ifdef __KERNEL__ +#include + struct i2c_mux_core { struct i2c_adapter *parent; struct device *dev; + bool mux_locked; void *priv; @@ -47,11 +50,16 @@ struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent, int (*select)(struct i2c_mux_core *, u32), int (*deselect)(struct i2c_mux_core *, u32)); +/* flags for i2c_mux_alloc */ +#define I2C_MUX_LOCKED BIT(0) + static inline void *i2c_mux_priv(struct i2c_mux_core *muxc) { return muxc->priv; } +struct i2c_adapter *i2c_root_adapter(struct device *dev); + /* * Called to create an i2c bus on a multiplexed bus segment. * The chan_id parameter is passed to the select and deselect diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 50934d6e1050..96a25ae14494 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -524,6 +524,7 @@ struct i2c_adapter { /* data fields that are valid for all devices */ struct rt_mutex bus_lock; + struct rt_mutex mux_lock; int timeout; /* in jiffies */ int retries; -- cgit v1.2.3 From 46cc6e4976e3d9058490f20d93bc7805f7f2d81e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 May 2016 16:56:03 -0700 Subject: tcp: fix lockdep splat in tcp_snd_una_update() tcp_snd_una_update() and tcp_rcv_nxt_update() call u64_stats_update_begin() either from process context or BH handler. This triggers a lockdep splat on 32bit & SMP builds. We could add u64_stats_update_begin_bh() variant but this would slow down 32bit builds with useless local_disable_bh() and local_enable_bh() pairs, since we own the socket lock at this point. I add sock_owned_by_me() helper to have proper lockdep support even on 64bit builds, and new u64_stats_update_begin_raw() and u64_stats_update_end_raw methods. Fixes: c10d9310edf5 ("tcp: do not assume TCP code is non preemptible") Reported-by: Fabio Estevam Diagnosed-by: Francois Romieu Signed-off-by: Eric Dumazet Tested-by: Fabio Estevam Signed-off-by: David S. Miller --- include/linux/u64_stats_sync.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index df89c9bcba7d..d3a2bb712af3 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -89,6 +89,20 @@ static inline void u64_stats_update_end(struct u64_stats_sync *syncp) #endif } +static inline void u64_stats_update_begin_raw(struct u64_stats_sync *syncp) +{ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + raw_write_seqcount_begin(&syncp->seq); +#endif +} + +static inline void u64_stats_update_end_raw(struct u64_stats_sync *syncp) +{ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + raw_write_seqcount_end(&syncp->seq); +#endif +} + static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) -- cgit v1.2.3 From f6a4790a5471d7cba406d87f6b41323f40bb93d2 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Wed, 27 Apr 2016 20:45:02 +0800 Subject: video / backlight: add two APIs for drivers to use It is useful to get the backlight device's pointer and use it to set backlight in some cases(the following patch will make use of it) so add the two APIs and export them. Signed-off-by: Aaron Lu Acked-by: Jingoo Han Signed-off-by: Rafael J. Wysocki --- include/linux/backlight.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 1e7a69adbe6f..f46b88fa4a09 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -144,6 +144,8 @@ extern void backlight_force_update(struct backlight_device *bd, extern bool backlight_device_registered(enum backlight_type type); extern int backlight_register_notifier(struct notifier_block *nb); extern int backlight_unregister_notifier(struct notifier_block *nb); +extern struct backlight_device *backlight_device_get_by_type(enum backlight_type type); +extern int backlight_device_set_brightness(struct backlight_device *bd, unsigned long brightness); #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev) -- cgit v1.2.3 From 01c3664de62f89f6777e59173ad8e20b5a4c267f Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Wed, 27 Apr 2016 20:45:03 +0800 Subject: video / backlight: remove the backlight_device_registered API Since we will need the backlight_device_get_by_type API, we can use it instead of the backlight_device_registered API whenever necessary so remove the backlight_device_registered API. Signed-off-by: Aaron Lu Acked-by: Jingoo Han Signed-off-by: Rafael J. Wysocki --- include/linux/backlight.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index f46b88fa4a09..5f2fd61ef4fb 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -141,7 +141,6 @@ extern void devm_backlight_device_unregister(struct device *dev, struct backlight_device *bd); extern void backlight_force_update(struct backlight_device *bd, enum backlight_update_reason reason); -extern bool backlight_device_registered(enum backlight_type type); extern int backlight_register_notifier(struct notifier_block *nb); extern int backlight_unregister_notifier(struct notifier_block *nb); extern struct backlight_device *backlight_device_get_by_type(enum backlight_type type); -- cgit v1.2.3 From e10cfdc33a0f23dc8449be7267f0a642e96a2a24 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 3 May 2016 16:48:39 +0800 Subject: ACPI / osi: Fix default _OSI(Darwin) support The following commit always reports positive value when Apple hardware queries _OSI("Darwin"): Commit: 7bc5a2bad0b8d9d1ac9f7b8b33150e4ddf197334 Subject: ACPI: Support _OSI("Darwin") correctly However since this implementation places the judgement in runtime, it breaks acpi_osi=!Darwin and cannot return unsupported for _OSI("WinXXX") invoked before invoking _OSI("Darwin"). This patch fixes the issues by reverting the wrong support and implementing the default behavior of _OSI("Darwin")/_OSI("WinXXX") on Apple hardware via DMI matching. Fixes: 7bc5a2bad0b8 (ACPI: Support _OSI("Darwin") correctly) Link: https://bugzilla.kernel.org/show_bug.cgi?id=92111 Reported-and-tested-by: Lukas Wunner Signed-off-by: Chen Yu Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 06ed7e54033e..6c7176efd543 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -360,6 +360,7 @@ extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); extern int acpi_blacklisted(void); extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d); +extern void acpi_dmi_osi_darwin(int enable, const struct dmi_system_id *d); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); -- cgit v1.2.3 From dc45eb20a83d11ed649169fbe9159ed6bf586c88 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 3 May 2016 16:48:46 +0800 Subject: ACPI / osi: Cleanup OSI handling code to use bool This patch changes "int/unsigned int" to "bool" to simplify the code. Tested-by: Lukas Wunner Tested-by: Chen Yu Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6c7176efd543..0ccfeba71fcf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -359,8 +359,8 @@ extern bool wmi_has_guid(const char *guid); extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); extern int acpi_blacklisted(void); -extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d); -extern void acpi_dmi_osi_darwin(int enable, const struct dmi_system_id *d); +extern void acpi_dmi_osi_linux(bool enable, const struct dmi_system_id *d); +extern void acpi_dmi_osi_darwin(bool enable, const struct dmi_system_id *d); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); -- cgit v1.2.3 From d5a91d74c6d7da2cebadbb9f2d03e56f84d7be62 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 3 May 2016 16:48:53 +0800 Subject: ACPI / osi: Cleanup coding style issues before creating a separate OSI source file This patch performs necessary cleanups before moving OSI support to another file. 1. Change printk into pr_xxx 2. Do not initialize values to 0 3. Do not append additional "return" at the end of the function 4. Remove useless comments which may easily break line breaking rule After fixing the coding style issues, rename functions to make them looking like acpi_osi_xxx. No functional changes. Tested-by: Lukas Wunner Tested-by: Chen Yu Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0ccfeba71fcf..bf0adc611aad 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -359,8 +359,8 @@ extern bool wmi_has_guid(const char *guid); extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); extern int acpi_blacklisted(void); -extern void acpi_dmi_osi_linux(bool enable, const struct dmi_system_id *d); -extern void acpi_dmi_osi_darwin(bool enable, const struct dmi_system_id *d); +extern void acpi_osi_dmi_linux(bool enable, const struct dmi_system_id *d); +extern void acpi_osi_dmi_darwin(bool enable, const struct dmi_system_id *d); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); -- cgit v1.2.3 From e5f660ebef68e3ed1a988ad06ba23562153cee5c Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 3 May 2016 16:49:01 +0800 Subject: ACPI / osi: Collect _OSI handling into one single file _OSI handling code grows giant and it's time to move them into one file. This patch collects all _OSI handling code into one single file. So that we only have the following functions to be used externally: early_acpi_osi_init(): Used by DMI detections; acpi_osi_init(): Used to initialize OSI command line settings and install Linux specific _OSI handler; acpi_osi_setup(): The API that should be used by the external quirks. acpi_osi_is_win8(): The API is used by the external drivers to determine if BIOS supports Win8. CONFIG_DMI is not useful as stub dmi_check_system() can make everything stub because of strip. No functional changes. Tested-by: Lukas Wunner Tested-by: Chen Yu Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index bf0adc611aad..58f707a399c2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -359,8 +359,6 @@ extern bool wmi_has_guid(const char *guid); extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); extern int acpi_blacklisted(void); -extern void acpi_osi_dmi_linux(bool enable, const struct dmi_system_id *d); -extern void acpi_osi_dmi_darwin(bool enable, const struct dmi_system_id *d); extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); -- cgit v1.2.3 From 9e5ed6d1fb87dc3ff0c2a94ed75fe82027a9f597 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Sun, 17 Apr 2016 13:36:56 -0400 Subject: ACPI,PCI,IRQ: remove SCI penalize function Removing the SCI penalize function as the penalty is now calculated on the fly. Signed-off-by: Sinan Kaya Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 06ed7e54033e..0f413170c8f9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -311,7 +311,6 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); -void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); extern int ec_read(u8 addr, u8 *val); -- cgit v1.2.3 From ddbb74bc70c0dbaab85d1aa2564b0b3217267454 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 30 Apr 2016 13:33:29 +0200 Subject: PM / OPP: pass cpumask by reference The new use of dev_pm_opp_set_sharing_cpus resulted in a harmless compiler warning with CONFIG_CPUMASK_OFFSTACK=y: drivers/cpufreq/mvebu-cpufreq.c: In function 'armada_xp_pmsu_cpufreq_init': include/linux/cpumask.h:550:25: error: passing argument 2 of 'dev_pm_opp_set_sharing_cpus' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] The problem here is that cpumask_var_t gets passed by reference, but by declaring a 'const cpumask_var_t' argument, only the pointer is constant, not the actual mask. This is harmless because the function does not actually modify the mask. This patch changes the function prototypes for all of the related functions to pass a 'struct cpumask *' instead of 'cpumask_var_t', matching what most other such functions do in the kernel. This lets us mark all the other similar functions as taking a 'const' mask where possible, and it avoids the warning without any change in object code. Signed-off-by: Arnd Bergmann Fixes: 947bd567f7a5 (mvebu: Use dev_pm_opp_set_sharing_cpus() to mark OPP tables as shared) Acked-by: Pavel Machek Acked-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 15f554443b59..5221d259e413 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -65,8 +65,8 @@ void dev_pm_opp_put_prop_name(struct device *dev); int dev_pm_opp_set_regulator(struct device *dev, const char *name); void dev_pm_opp_put_regulator(struct device *dev); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); -int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask); -int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); +int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); +int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -180,12 +180,12 @@ static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_f return -ENOTSUPP; } -static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const cpumask_var_t cpumask) +static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask) { return -ENOTSUPP; } -static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) { return -EINVAL; } @@ -195,9 +195,9 @@ static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, cpumask_va #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) int dev_pm_opp_of_add_table(struct device *dev); void dev_pm_opp_of_remove_table(struct device *dev); -int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask); -void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask); -int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); +int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask); +void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask); +int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); #else static inline int dev_pm_opp_of_add_table(struct device *dev) { @@ -208,16 +208,16 @@ static inline void dev_pm_opp_of_remove_table(struct device *dev) { } -static inline int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask) +static inline int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask) { return -ENOTSUPP; } -static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask) +static inline void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask) { } -static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) +static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) { return -ENOTSUPP; } -- cgit v1.2.3 From 411466c5081d2f649b3583cae0f6c9ad5edec636 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 3 May 2016 15:05:04 +0100 Subject: PM / OPP: add non-OF versions of dev_pm_opp_{cpumask_, }remove_table Functions dev_pm_opp_of_{cpumask_,}remove_table removes/frees all the static OPP entries associated with the device and/or all cpus(in case of cpumask) that are created from DT. However the OPP entries are populated reading from the firmware or some different method using dev_pm_opp_add are marked dynamic and can't be removed using above functions. This patch adds non DT/OF versions of dev_pm_opp_{cpumask_,}remove_table to support the above mentioned usecase. This is in preparation to make use of the same in scpi-cpufreq.c Signed-off-by: Sudeep Holla Acked-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 5221d259e413..bca26157f5b6 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -67,6 +67,8 @@ void dev_pm_opp_put_regulator(struct device *dev); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); +void dev_pm_opp_remove_table(struct device *dev); +void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -190,6 +192,14 @@ static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpu return -EINVAL; } +static inline void dev_pm_opp_remove_table(struct device *dev) +{ +} + +static inline void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask) +{ +} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) -- cgit v1.2.3 From e7904a28f5331c21d17af638cb477c83662e3cb6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 1 Aug 2015 19:25:08 +0200 Subject: locking/lockdep, sched/core: Implement a better lock pinning scheme The problem with the existing lock pinning is that each pin is of value 1; this mean you can simply unpin if you know its pinned, without having any extra information. This scheme generates a random (16 bit) cookie for each pin and requires this same cookie to unpin. This means you have to keep the cookie in context. No objsize difference for !LOCKDEP kernels. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d026b190c530..cf9bf9612702 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -354,8 +354,13 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask); extern void lockdep_clear_current_reclaim_state(void); extern void lockdep_trace_alloc(gfp_t mask); -extern void lock_pin_lock(struct lockdep_map *lock); -extern void lock_unpin_lock(struct lockdep_map *lock); +struct pin_cookie { unsigned int val; }; + +#define NIL_COOKIE (struct pin_cookie){ .val = 0U, } + +extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); +extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); +extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); # define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0, @@ -371,8 +376,9 @@ extern void lock_unpin_lock(struct lockdep_map *lock); #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) -#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map) -#define lockdep_unpin_lock(l) lock_unpin_lock(&(l)->dep_map) +#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map) +#define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c)) +#define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c)) #else /* !CONFIG_LOCKDEP */ @@ -425,8 +431,13 @@ struct lock_class_key { }; #define lockdep_recursing(tsk) (0) -#define lockdep_pin_lock(l) do { (void)(l); } while (0) -#define lockdep_unpin_lock(l) do { (void)(l); } while (0) +struct pin_cookie { }; + +#define NIL_COOKIE (struct pin_cookie){ } + +#define lockdep_pin_lock(l) ({ struct pin_cookie cookie; cookie; }) +#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0) +#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0) #endif /* !LOCKDEP */ -- cgit v1.2.3 From 6ecdd74962f246dfe8750b7bea481a1c0816315d Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Tue, 5 Apr 2016 12:12:26 +0800 Subject: sched/fair: Generalize the load/util averages resolution definition Integer metric needs fixed point arithmetic. In sched/fair, a few metrics, e.g., weight, load, load_avg, util_avg, freq, and capacity, may have different fixed point ranges, which makes their update and usage error-prone. In order to avoid the errors relating to the fixed point range, we definie a basic fixed point range, and then formalize all metrics to base on the basic range. The basic range is 1024 or (1 << 10). Further, one can recursively apply the basic range to have larger range. Pointed out by Ben Segall, weight (visible to user, e.g., NICE-0 has 1024) and load (e.g., NICE_0_LOAD) have independent ranges, but they must be well calibrated. Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bsegall@google.com Cc: dietmar.eggemann@arm.com Cc: lizefan@huawei.com Cc: morten.rasmussen@arm.com Cc: pjt@google.com Cc: umgwanakikbuti@gmail.com Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/1459829551-21625-2-git-send-email-yuyang.du@intel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d894f2d61388..7d779d70a3a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -936,10 +936,20 @@ enum cpu_idle_type { CPU_MAX_IDLE_TYPES }; +/* + * Integer metrics need fixed point arithmetic, e.g., sched/fair + * has a few: load, load_avg, util_avg, freq, and capacity. + * + * We define a basic fixed point arithmetic range, and then formalize + * all these metrics based on that basic range. + */ +# define SCHED_FIXEDPOINT_SHIFT 10 +# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) + /* * Increase resolution of cpu_capacity calculations */ -#define SCHED_CAPACITY_SHIFT 10 +#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) /* @@ -1205,8 +1215,8 @@ struct load_weight { * 1) load_avg factors frequency scaling into the amount of time that a * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the * aggregated such weights of all runnable and blocked sched_entities. - * 2) util_avg factors frequency and cpu scaling into the amount of time - * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE]. + * 2) util_avg factors frequency and cpu capacity scaling into the amount of time + * that a sched_entity is running on a CPU, in the range [0..SCHED_CAPACITY_SCALE]. * For cfs_rq, it is the aggregated such times of all runnable and * blocked sched_entities. * The 64 bit load_sum can: -- cgit v1.2.3 From 7b5953345efe4f226bb52cbea04558d16ec7ebfa Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Tue, 5 Apr 2016 12:12:28 +0800 Subject: sched/fair: Add detailed description to the sched load avg metrics These sched metrics have become complex enough, so describe them in detail at their definition. Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) [ Fixed the text to improve its spelling and typography. ] Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bsegall@google.com Cc: dietmar.eggemann@arm.com Cc: lizefan@huawei.com Cc: morten.rasmussen@arm.com Cc: pjt@google.com Cc: umgwanakikbuti@gmail.com Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/1459829551-21625-4-git-send-email-yuyang.du@intel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 60 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7d779d70a3a5..57faf789c88f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1211,18 +1211,56 @@ struct load_weight { }; /* - * The load_avg/util_avg accumulates an infinite geometric series. - * 1) load_avg factors frequency scaling into the amount of time that a - * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the - * aggregated such weights of all runnable and blocked sched_entities. - * 2) util_avg factors frequency and cpu capacity scaling into the amount of time - * that a sched_entity is running on a CPU, in the range [0..SCHED_CAPACITY_SCALE]. - * For cfs_rq, it is the aggregated such times of all runnable and + * The load_avg/util_avg accumulates an infinite geometric series + * (see __update_load_avg() in kernel/sched/fair.c). + * + * [load_avg definition] + * + * load_avg = runnable% * scale_load_down(load) + * + * where runnable% is the time ratio that a sched_entity is runnable. + * For cfs_rq, it is the aggregated load_avg of all runnable and * blocked sched_entities. - * The 64 bit load_sum can: - * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with - * the highest weight (=88761) always runnable, we should not overflow - * 2) for entity, support any load.weight always runnable + * + * load_avg may also take frequency scaling into account: + * + * load_avg = runnable% * scale_load_down(load) * freq% + * + * where freq% is the CPU frequency normalized to the highest frequency. + * + * [util_avg definition] + * + * util_avg = running% * SCHED_CAPACITY_SCALE + * + * where running% is the time ratio that a sched_entity is running on + * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable + * and blocked sched_entities. + * + * util_avg may also factor frequency scaling and CPU capacity scaling: + * + * util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity% + * + * where freq% is the same as above, and capacity% is the CPU capacity + * normalized to the greatest capacity (due to uarch differences, etc). + * + * N.B., the above ratios (runnable%, running%, freq%, and capacity%) + * themselves are in the range of [0, 1]. To do fixed point arithmetics, + * we therefore scale them to as large a range as necessary. This is for + * example reflected by util_avg's SCHED_CAPACITY_SCALE. + * + * [Overflow issue] + * + * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities + * with the highest load (=88761), always runnable on a single cfs_rq, + * and should not overflow as the number already hits PID_MAX_LIMIT. + * + * For all other cases (including 32-bit kernels), struct load_weight's + * weight will overflow first before we do, because: + * + * Max(load_avg) <= Max(load.weight) + * + * Then it is the load_weight's responsibility to consider overflow + * issues. */ struct sched_avg { u64 last_update_time, load_sum; -- cgit v1.2.3 From a1cc5bcfcfca0b99f009b117785142dbdc3b87a3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Apr 2016 20:35:25 +0200 Subject: locking/atomics: Flip atomic_fetch_or() arguments All the atomic operations have their arguments the wrong way around; make atomic_fetch_or() consistent and flip them. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 506c3531832e..e451534fe54d 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -560,11 +560,11 @@ static inline int atomic_dec_if_positive(atomic_t *v) /** * atomic_fetch_or - perform *p |= mask and return old value of *p - * @p: pointer to atomic_t * @mask: mask to OR on the atomic_t + * @p: pointer to atomic_t */ #ifndef atomic_fetch_or -static inline int atomic_fetch_or(atomic_t *p, int mask) +static inline int atomic_fetch_or(int mask, atomic_t *p) { int old, val = atomic_read(p); -- cgit v1.2.3 From 375637bc524952f1122ea22caf5a8f1fecad8228 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 27 Apr 2016 18:44:46 +0300 Subject: perf/core: Introduce address range filtering Many instruction tracing PMUs out there support address range-based filtering, which would, for example, generate trace data only for a given range of instruction addresses, which is useful for tracing individual functions, modules or libraries. Other PMUs may also utilize this functionality to allow filtering to or filtering out code at certain address ranges. This patch introduces the interface for userspace to specify these filters and for the PMU drivers to apply these filters to hardware configuration. The user interface is an ASCII string that is passed via an ioctl() and specifies (in the form of an ASCII string) address ranges within certain object files or within kernel. There is no special treatment for kernel modules yet, but it might be a worthy pursuit. The PMU driver interface basically adds two extra callbacks to the PMU driver structure, one of which validates the filter configuration proposed by the user against what the hardware is actually capable of doing and the other one translates hardware-independent filter configuration into something that can be programmed into the hardware. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mathieu Poirier Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/1461771888-10409-6-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a090700cccca..c77e4a159fa2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -151,6 +151,15 @@ struct hw_perf_event { */ struct task_struct *target; + /* + * PMU would store hardware filter configuration + * here. + */ + void *addr_filters; + + /* Last sync'ed generation of filters */ + unsigned long addr_filters_gen; + /* * hw_perf_event::state flags; used to track the PERF_EF_* state. */ @@ -240,6 +249,9 @@ struct pmu { int task_ctx_nr; int hrtimer_interval_ms; + /* number of address filters this PMU can do */ + unsigned int nr_addr_filters; + /* * Fully disable/enable this PMU, can be used to protect from the PMI * as well as for lazy/batch writing of the MSRs. @@ -392,12 +404,71 @@ struct pmu { */ void (*free_aux) (void *aux); /* optional */ + /* + * Validate address range filters: make sure the HW supports the + * requested configuration and number of filters; return 0 if the + * supplied filters are valid, -errno otherwise. + * + * Runs in the context of the ioctl()ing process and is not serialized + * with the rest of the PMU callbacks. + */ + int (*addr_filters_validate) (struct list_head *filters); + /* optional */ + + /* + * Synchronize address range filter configuration: + * translate hw-agnostic filters into hardware configuration in + * event::hw::addr_filters. + * + * Runs as a part of filter sync sequence that is done in ->start() + * callback by calling perf_event_addr_filters_sync(). + * + * May (and should) traverse event::addr_filters::list, for which its + * caller provides necessary serialization. + */ + void (*addr_filters_sync) (struct perf_event *event); + /* optional */ + /* * Filter events for PMU-specific reasons. */ int (*filter_match) (struct perf_event *event); /* optional */ }; +/** + * struct perf_addr_filter - address range filter definition + * @entry: event's filter list linkage + * @inode: object file's inode for file-based filters + * @offset: filter range offset + * @size: filter range size + * @range: 1: range, 0: address + * @filter: 1: filter/start, 0: stop + * + * This is a hardware-agnostic filter configuration as specified by the user. + */ +struct perf_addr_filter { + struct list_head entry; + struct inode *inode; + unsigned long offset; + unsigned long size; + unsigned int range : 1, + filter : 1; +}; + +/** + * struct perf_addr_filters_head - container for address range filters + * @list: list of filters for this event + * @lock: spinlock that serializes accesses to the @list and event's + * (and its children's) filter generations. + * + * A child event will use parent's @list (and therefore @lock), so they are + * bundled together; see perf_event_addr_filters(). + */ +struct perf_addr_filters_head { + struct list_head list; + raw_spinlock_t lock; +}; + /** * enum perf_event_active_state - the states of a event */ @@ -566,6 +637,12 @@ struct perf_event { atomic_t event_limit; + /* address range filters */ + struct perf_addr_filters_head addr_filters; + /* vma address array for file-based filders */ + unsigned long *addr_filters_offs; + unsigned long addr_filters_gen; + void (*destroy)(struct perf_event *); struct rcu_head rcu_head; @@ -1070,6 +1147,27 @@ static inline bool is_write_backward(struct perf_event *event) return !!event->attr.write_backward; } +static inline bool has_addr_filter(struct perf_event *event) +{ + return event->pmu->nr_addr_filters; +} + +/* + * An inherited event uses parent's filters + */ +static inline struct perf_addr_filters_head * +perf_event_addr_filters(struct perf_event *event) +{ + struct perf_addr_filters_head *ifh = &event->addr_filters; + + if (event->parent) + ifh = &event->parent->addr_filters; + + return ifh; +} + +extern void perf_event_addr_filters_sync(struct perf_event *event); + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern int perf_output_begin_forward(struct perf_output_handle *handle, -- cgit v1.2.3 From 5101ef20f0ef1de79091a1fdb6b1a7f07565545a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 Apr 2016 11:33:46 +0100 Subject: perf/arm: Special-case hetereogeneous CPUs Commit: 26657848502b7847 ("perf/core: Verify we have a single perf_hw_context PMU") forcefully prevents multiple PMUs from sharing perf_hw_context, as this generally doesn't make sense. It is a common bug for uncore PMUs to use perf_hw_context rather than perf_invalid_context, which this detects. However, systems exist with heterogeneous CPUs (and hence heterogeneous HW PMUs), for which sharing perf_hw_context is necessary, and possible in some limited cases. To make this work we have to perform some gymnastics, as we did in these commits: 66eb579e66ecfea5 ("perf: allow for PMU-specific event filtering") c904e32a69b7c779 ("arm: perf: filter unschedulable events") To allow those systems to work, we must allow PMUs for heterogeneous CPUs to share perf_hw_context, though we must still disallow sharing otherwise to detect the common misuse of perf_hw_context. This patch adds a new PERF_PMU_CAP_HETEROGENEOUS_CPUS for this, updates the core logic to account for this, and makes use of it in the arm_pmu code that is used for systems with heterogeneous CPUs. Comments are added to make the rationale clear and hopefully avoid accidental abuse. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Catalin Marinas Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20160426103346.GA20836@leverpostej Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c77e4a159fa2..9e1c3ada91c4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -225,6 +225,7 @@ struct perf_event; #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 #define PERF_PMU_CAP_EXCLUSIVE 0x10 #define PERF_PMU_CAP_ITRACE 0x20 +#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 /** * struct pmu - generic performance monitoring unit -- cgit v1.2.3 From 4d2458507d0b465c62ae80f3e81b8c008ec96b05 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 4 May 2016 19:33:59 -0300 Subject: ASoC: fsl_sai: Allow setting the SAI MCLK direction On mx6ul the General Purpose Register 1 (GPR1) contains the following bits for configuring the direction of the SAI MCLKs: SAI1_MCLK_DIR, SAI2_MCLK_DIR, SAI3_MCLK_DIR Introduce the "fsl,sai-mclk-direction-output" optional property to allow configuring the SAI_MCLK outputs. Tested on a imx6ul-evk board. Signed-off-by: Fabio Estevam Acked-by: Nicolin Chen Signed-off-by: Mark Brown --- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index 238c8db953eb..68353822afce 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -447,5 +447,11 @@ #define IMX6UL_GPR1_ENET2_CLK_OUTPUT (0x1 << 18) #define IMX6UL_GPR1_ENET_CLK_DIR (0x3 << 17) #define IMX6UL_GPR1_ENET_CLK_OUTPUT (0x3 << 17) +#define IMX6UL_GPR1_SAI1_MCLK_DIR (0x1 << 19) +#define IMX6UL_GPR1_SAI2_MCLK_DIR (0x1 << 20) +#define IMX6UL_GPR1_SAI3_MCLK_DIR (0x1 << 21) +#define IMX6UL_GPR1_SAI_MCLK_MASK (0x7 << 19) +#define MCLK_DIR(x) (x == 1 ? IMX6UL_GPR1_SAI1_MCLK_DIR : x == 2 ? \ + IMX6UL_GPR1_SAI2_MCLK_DIR : IMX6UL_GPR1_SAI3_MCLK_DIR) #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */ -- cgit v1.2.3 From 0ef5a50c1658d4d96a44f145bcb92ff3310c75b1 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 5 May 2016 11:54:22 -0400 Subject: block: make bio_inc_remaining() interface accessible again Commit 326e1dbb57 ("block: remove management of bi_remaining when restoring original bi_end_io") made bio_inc_remaining() private to bio.c because the only use-case that made sense was confined to the bio_chain() interface. Since that time DM thinp went on to use bio_chain() in its relatively complex implementation of async discard support. That implementation, even when converted over to use the new async __blkdev_issue_discard() interface, depends on deferred completion of the original discard bio -- which is most appropriately implemented using bio_inc_remaining(). DM thinp foolishly duplicated bio_inc_remaining(), local to dm-thin.c as __bio_inc_remaining(), so re-exporting bio_inc_remaining() allows us to put an end to that foolishness. All said, bio_inc_remaining() should really only be used in conjunction with bio_chain(). It isn't intended for generic bio reference counting. Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 6b7481f62218..9faebf7f9a33 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -702,6 +702,17 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } +/* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio_set_flag(bio, BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. -- cgit v1.2.3 From e5b2d30e42e66122c9b1b17529df743bc938c041 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Feb 2016 19:58:11 +0100 Subject: mtd: nand: sharpsl: switch to mtd_ooblayout_ops Implementing the mtd_ooblayout_ops interface is the new way of exposing ECC/OOB layout to MTD users. Signed-off-by: Boris Brezillon --- include/linux/mtd/sharpsl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h index 25f4d2a845c1..65e91d0fa981 100644 --- a/include/linux/mtd/sharpsl.h +++ b/include/linux/mtd/sharpsl.h @@ -14,7 +14,7 @@ struct sharpsl_nand_platform_data { struct nand_bbt_descr *badblock_pattern; - struct nand_ecclayout *ecc_layout; + const struct mtd_ooblayout_ops *ecc_layout; struct mtd_partition *partitions; unsigned int nr_partitions; }; -- cgit v1.2.3 From 04a123a99f089773fc6ee6e21af5f831d87fe362 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 9 Feb 2016 15:01:21 +0100 Subject: mtd: nand: fsmc: get rid of the fsmc_nand_eccplace struct Now that mtd_ooblayout_ecc() returns the ECC byte position using the OOB free method, we can get rid of the fsmc_nand_eccplace struct. Signed-off-by: Boris Brezillon --- include/linux/mtd/fsmc.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h index c8be32e9fc49..ad3c3488073c 100644 --- a/include/linux/mtd/fsmc.h +++ b/include/linux/mtd/fsmc.h @@ -103,24 +103,6 @@ #define FSMC_BUSY_WAIT_TIMEOUT (1 * HZ) -/* - * There are 13 bytes of ecc for every 512 byte block in FSMC version 8 - * and it has to be read consecutively and immediately after the 512 - * byte data block for hardware to generate the error bit offsets - * Managing the ecc bytes in the following way is easier. This way is - * similar to oobfree structure maintained already in u-boot nand driver - */ -#define MAX_ECCPLACE_ENTRIES 32 - -struct fsmc_nand_eccplace { - uint8_t offset; - uint8_t length; -}; - -struct fsmc_eccplace { - struct fsmc_nand_eccplace eccplace[MAX_ECCPLACE_ENTRIES]; -}; - struct fsmc_nand_timings { uint8_t tclr; uint8_t tar; -- cgit v1.2.3 From a411679fb5fd7ee2df64a55c23c81538ceeb6d06 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 7 Dec 2015 22:46:45 +0100 Subject: mtd: onenand: switch to mtd_ooblayout_ops Implementing the mtd_ooblayout_ops interface is the new way of exposing ECC/OOB layout to MTD users. Modify the onenand drivers to switch to this approach. Signed-off-by: Boris Brezillon --- include/linux/mtd/onenand.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h index 4596503c9da9..0aaa98b219a4 100644 --- a/include/linux/mtd/onenand.h +++ b/include/linux/mtd/onenand.h @@ -80,7 +80,6 @@ struct onenand_bufferram { * @page_buf: [INTERN] page main data buffer * @oob_buf: [INTERN] page oob data buffer * @subpagesize: [INTERN] holds the subpagesize - * @ecclayout: [REPLACEABLE] the default ecc placement scheme * @bbm: [REPLACEABLE] pointer to Bad Block Management * @priv: [OPTIONAL] pointer to private chip date */ @@ -134,7 +133,6 @@ struct onenand_chip { #endif int subpagesize; - struct nand_ecclayout *ecclayout; void *bbm; -- cgit v1.2.3 From 7f2b092c9eeda055ae60af194a8edacaea5f7a10 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Feb 2016 19:24:10 +0100 Subject: mtd: nand: kill the ecc->layout field Now that all NAND drivers have switched to mtd_ooblayout_ops, we can kill the ecc->layout field. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index f2ded7b1b3b8..e851839daf09 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -473,7 +473,6 @@ struct nand_hw_control { * @prepad: padding information for syndrome based ECC generators * @postpad: padding information for syndrome based ECC generators * @options: ECC specific options (see NAND_ECC_XXX flags defined above) - * @layout: ECC layout control struct pointer * @priv: pointer to private ECC control data * @hwctl: function to control hardware ECC generator. Must only * be provided if an hardware ECC is available @@ -524,7 +523,6 @@ struct nand_ecc_ctrl { int prepad; int postpad; unsigned int options; - struct nand_ecclayout *layout; void *priv; void (*hwctl)(struct mtd_info *mtd, int mode); int (*calculate)(struct mtd_info *mtd, const uint8_t *dat, -- cgit v1.2.3 From aab616e31d1c7ec3726f7f5cbdaaec98759ebe93 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 4 Feb 2016 10:16:18 +0100 Subject: mtd: kill the nand_ecclayout struct Now that all MTD drivers have moved to the mtd_ooblayout_ops model we can safely remove the struct nand_ecclayout definition, and all the remaining places where it was still used. Signed-off-by: Boris Brezillon --- include/linux/mtd/mtd.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 177bf314ad70..29a170612203 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -96,21 +96,6 @@ struct mtd_oob_ops { #define MTD_MAX_OOBFREE_ENTRIES_LARGE 32 #define MTD_MAX_ECCPOS_ENTRIES_LARGE 640 -/* - * Internal ECC layout control structure. For historical reasons, there is a - * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained - * for export to user-space via the ECCGETLAYOUT ioctl. - * nand_ecclayout should be expandable in the future simply by the above macros. - * - * This structure is now deprecated, you should use struct nand_ecclayout_ops - * to describe your OOB layout. - */ -struct nand_ecclayout { - __u32 eccbytes; - __u32 eccpos[MTD_MAX_ECCPOS_ENTRIES_LARGE]; - struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES_LARGE]; -}; - /** * struct mtd_oob_region - oob region definition * @offset: region offset @@ -200,9 +185,6 @@ struct mtd_info { const char *name; int index; - /* [Deprecated] ECC layout structure pointer - read only! */ - struct nand_ecclayout *ecclayout; - /* OOB layout description */ const struct mtd_ooblayout_ops *ooblayout; @@ -308,8 +290,6 @@ int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, int mtd_ooblayout_count_freebytes(struct mtd_info *mtd); int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd); -void mtd_set_ecclayout(struct mtd_info *mtd, struct nand_ecclayout *ecclayout); - static inline void mtd_set_ooblayout(struct mtd_info *mtd, const struct mtd_ooblayout_ops *ooblayout) { -- cgit v1.2.3 From d48f62b9a0a035d6c16de4a4dae315f7332a8939 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 1 Apr 2016 14:54:32 +0200 Subject: mtd: nand: move of_get_nand_xxx() helpers into nand_base.c Now that all drivers go through nand_set_flash_node() to parse the generic NAND properties, we can move all of_get_nand_xxx() helpers in to nand_base.c, make them static and remove of_mtd.c and of_mtd.h. Signed-off-by: Boris Brezillon --- include/linux/of_mtd.h | 56 -------------------------------------------------- 1 file changed, 56 deletions(-) delete mode 100644 include/linux/of_mtd.h (limited to 'include/linux') diff --git a/include/linux/of_mtd.h b/include/linux/of_mtd.h deleted file mode 100644 index 0f6aca5c6f2f..000000000000 --- a/include/linux/of_mtd.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2012 Jean-Christophe PLAGNIOL-VILLARD - * - * OF helpers for mtd. - * - * This file is released under the GPLv2 - */ - -#ifndef __LINUX_OF_MTD_H -#define __LINUX_OF_MTD_H - -#ifdef CONFIG_OF_MTD - -#include -int of_get_nand_ecc_mode(struct device_node *np); -int of_get_nand_ecc_algo(struct device_node *np); -int of_get_nand_ecc_step_size(struct device_node *np); -int of_get_nand_ecc_strength(struct device_node *np); -int of_get_nand_bus_width(struct device_node *np); -bool of_get_nand_on_flash_bbt(struct device_node *np); - -#else /* CONFIG_OF_MTD */ - -static inline int of_get_nand_ecc_mode(struct device_node *np) -{ - return -ENOSYS; -} - -static inline int of_get_nand_ecc_algo(struct device_node *np) -{ - return -ENOSYS; -} - -static inline int of_get_nand_ecc_step_size(struct device_node *np) -{ - return -ENOSYS; -} - -static inline int of_get_nand_ecc_strength(struct device_node *np) -{ - return -ENOSYS; -} - -static inline int of_get_nand_bus_width(struct device_node *np) -{ - return -ENOSYS; -} - -static inline bool of_get_nand_on_flash_bbt(struct device_node *np) -{ - return false; -} - -#endif /* CONFIG_OF_MTD */ - -#endif /* __LINUX_OF_MTD_H */ -- cgit v1.2.3 From e4225ae8234cf5548c38dc887b233ad1d45b4d53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Apr 2016 22:53:07 +0200 Subject: mtd: mtd: drop NAND_ECC_SOFT_BCH enum value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This value should not be part of nand_ecc_modes_t as it specifies algorithm not a mode. We successfully managed to introduce new "algo" field which is respected now. Signed-off-by: Rafał Miłecki Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index e851839daf09..fbe8e164a4ee 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -116,7 +116,6 @@ typedef enum { NAND_ECC_HW, NAND_ECC_HW_SYNDROME, NAND_ECC_HW_OOB_FIRST, - NAND_ECC_SOFT_BCH, } nand_ecc_modes_t; enum nand_ecc_algo { -- cgit v1.2.3 From 466c3fb618b8520b75be37fcb115e9610663b945 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 5 May 2016 22:35:54 -0400 Subject: jbd2: remove excess descriptions for handle_s Commit bf6993276f74 ("jbd2: Use tracepoints for history file") removed the members j_history, j_history_max and j_history_cur from struct handle_s but the descriptions stayed lingering. Removing them. Signed-off-by: Luis de Bethencourt Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- include/linux/jbd2.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 39511484ad10..efb232c5f668 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -789,9 +789,6 @@ jbd2_time_diff(unsigned long start, unsigned long end) * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the * number that will fit in j_blocksize * @j_last_sync_writer: most recent pid which did a synchronous write - * @j_history: Buffer storing the transactions statistics history - * @j_history_max: Maximum number of transactions in the statistics history - * @j_history_cur: Current number of transactions in the statistics history * @j_history_lock: Protect the transactions statistics history * @j_proc_entry: procfs entry for the jbd statistics directory * @j_stats: Overall statistics -- cgit v1.2.3 From 73898db0430125606c86c798c0627aefef9af9ed Mon Sep 17 00:00:00 2001 From: Haggai Abramovsky Date: Wed, 4 May 2016 14:50:15 +0300 Subject: net/mlx4: Avoid wrong virtual mappings The dma_alloc_coherent() function returns a virtual address which can be used for coherent access to the underlying memory. On some architectures, like arm64, undefined behavior results if this memory is also accessed via virtual mappings that are not coherent. Because of their undefined nature, operations like virt_to_page() return garbage when passed virtual addresses obtained from dma_alloc_coherent(). Any subsequent mappings via vmap() of the garbage page values are unusable and result in bad things like bus errors (synchronous aborts in ARM64 speak). The mlx4 driver contains code that does the equivalent of: vmap(virt_to_page(dma_alloc_coherent)), this results in an OOPs when the device is opened. Prevent Ethernet driver to run this problematic code by forcing it to allocate contiguous memory. As for the Infiniband driver, at first we are trying to allocate contiguous memory, but in case of failure roll back to work with fragmented memory. Signed-off-by: Haggai Abramovsky Signed-off-by: Yishai Hadas Reported-by: David Daney Tested-by: Sinan Kaya Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d1f904c8b2cb..80dec87a94f8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1058,7 +1058,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { - if (BITS_PER_LONG == 64 || buf->nbufs == 1) + if (buf->nbufs == 1) return buf->direct.buf + offset; else return buf->page_list[offset >> PAGE_SHIFT].buf + @@ -1098,7 +1098,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, - int size, int max_direct); + int size); void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int size); -- cgit v1.2.3 From ba93cdce5bbe6929fd7486f87c985598ded8f451 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 28 Apr 2016 19:03:38 -0300 Subject: leds: triggers: Allow to switch the trigger to "panic" on a kernel panic This commit adds a new led_cdev flag LED_PANIC_INDICATOR, which allows to mark a specific LED to be switched to the "panic" trigger, on a kernel panic. This is useful to allow the user to assign a regular trigger to a given LED, and still blink that LED on a kernel panic. Signed-off-by: Ezequiel Garcia Reviewed-by: Matthias Brugger Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 19eb10278bea..7e9fb00e15e8 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -50,6 +50,7 @@ struct led_classdev { #define LED_SYSFS_DISABLE (1 << 22) #define LED_DEV_CAP_FLASH (1 << 23) #define LED_HW_PLUGGABLE (1 << 24) +#define LED_PANIC_INDICATOR (1 << 25) /* Set LED brightness level * Must not sleep. Use brightness_set_blocking for drivers -- cgit v1.2.3 From 80d6737b27bbdf645a815c136606836b435f0268 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 28 Apr 2016 19:03:40 -0300 Subject: leds: gpio: Support the "panic-indicator" firmware property Calling a GPIO LEDs is quite likely to work even if the kernel has paniced, so they are ideal to blink in this situation. This commit adds support for the new "panic-indicator" firmware property, allowing to mark a given LED to blink on a kernel panic. Signed-off-by: Ezequiel Garcia Reviewed-by: Matthias Brugger Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 7e9fb00e15e8..d2b13066e781 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -365,6 +365,7 @@ struct gpio_led { unsigned gpio; unsigned active_low : 1; unsigned retain_state_suspended : 1; + unsigned panic_indicator : 1; unsigned default_state : 2; /* default_state should be one of LEDS_GPIO_DEFSTATE_(ON|OFF|KEEP) */ struct gpio_desc *gpiod; -- cgit v1.2.3 From 6f3ffc19157a14b182d9d0c449cd613cef421fe1 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 12 Jan 2016 18:17:19 +0100 Subject: timer: add setup_deferrable_timer macro Add the trivial missing macro to setup a deferrable timer. Signed-off-by: Lucas Stach Acked-by: Thomas Gleixner --- include/linux/timer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 61aa61dc410c..20ac746f3eb3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -145,6 +145,8 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define setup_timer(timer, fn, data) \ __setup_timer((timer), (fn), (data), 0) +#define setup_deferrable_timer(timer, fn, data) \ + __setup_timer((timer), (fn), (data), TIMER_DEFERRABLE) #define setup_timer_on_stack(timer, fn, data) \ __setup_timer_on_stack((timer), (fn), (data), 0) #define setup_deferrable_timer_on_stack(timer, fn, data) \ -- cgit v1.2.3 From e9d867a67fd03ccc07248ca4e9c2f74fed494d5b Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Thu, 10 Mar 2016 12:54:08 +0100 Subject: sched: Allow per-cpu kernel threads to run on online && !active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to enable symmetric hotplug, we must mirror the online && !active state of cpu-down on the cpu-up side. However, to retain sanity, limit this state to per-cpu kthreads. Aside from the change to set_cpus_allowed_ptr(), which allow moving the per-cpu kthreads on, the other critical piece is the cpu selection for pinned tasks in select_task_rq(). This avoids dropping into select_fallback_rq(). select_fallback_rq() cannot be allowed to select !active cpus because its used to migrate user tasks away. And we do not want to move user tasks onto cpus that are in transition. Requested-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Thomas Gleixner Cc: Lai Jiangshan Cc: Jan H. Schönherr Cc: Oleg Nesterov Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160301152303.GV6356@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- include/linux/cpumask.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 40cee6b77a93..e828cf65d7df 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present) static inline void set_cpu_online(unsigned int cpu, bool online) { - if (online) { + if (online) cpumask_set_cpu(cpu, &__cpu_online_mask); - cpumask_set_cpu(cpu, &__cpu_active_mask); - } else { + else cpumask_clear_cpu(cpu, &__cpu_online_mask); - } } static inline void -- cgit v1.2.3 From 9cf7243d5d83d27aca47f842107bfa02b5f11d16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Mar 2016 12:54:09 +0100 Subject: sched: Make set_cpu_rq_start_time() a built in hotplug state Start distangling the maze of hotplug notifiers in the scheduler. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: rt@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + include/linux/sched.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 5d68e15e46b7..99fd1d2f76fe 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -8,6 +8,7 @@ enum cpuhp_state { CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, CPUHP_AP_OFFLINE, + CPUHP_AP_SCHED_STARTING, CPUHP_AP_NOTIFY_STARTING, CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, diff --git a/include/linux/sched.h b/include/linux/sched.h index 52c4847b05e2..39597d0a005e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -372,6 +372,7 @@ extern void cpu_init (void); extern void trap_init(void); extern void update_process_times(int user); extern void scheduler_tick(void); +extern int sched_cpu_starting(unsigned int cpu); extern void sched_show_task(struct task_struct *p); -- cgit v1.2.3 From 135fb3e19773e66f56b60e3b9fdda6166e77c55d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Mar 2016 12:54:11 +0100 Subject: sched: Consolidate the notifier maze We can maintain the ordering of the scheduler cpu hotplug functionality nicely in one notifer. Get rid of the maze. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: rt@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f9b1fab4388a..17017051bfb1 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -61,19 +61,15 @@ struct notifier_block; enum { /* * SCHED_ACTIVE marks a cpu which is coming up active during - * CPU_ONLINE and CPU_DOWN_FAILED and must be the first - * notifier. CPUSET_ACTIVE adjusts cpuset according to - * cpu_active mask right after SCHED_ACTIVE. During - * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are - * ordered in the similar way. + * CPU_ONLINE and CPU_DOWN_FAILED and must be the first notifier. Is + * also cpuset according to cpu_active mask right after activating the + * cpu. During CPU_DOWN_PREPARE, SCHED_INACTIVE reversed the operation. * * This ordering guarantees consistent cpu_active mask and * migration behavior to all cpu notifiers. */ CPU_PRI_SCHED_ACTIVE = INT_MAX, - CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1, - CPU_PRI_SCHED_INACTIVE = INT_MIN + 1, - CPU_PRI_CPUSET_INACTIVE = INT_MIN, + CPU_PRI_SCHED_INACTIVE = INT_MIN, /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, -- cgit v1.2.3 From 40190a78f85fec29f0fdd21f6b4415712085711e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Mar 2016 12:54:13 +0100 Subject: sched/hotplug: Convert cpu_[in]active notifiers to state machine Now that we reduced everything into single notifiers, it's simple to move them into the hotplug state machine space. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: rt@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 12 ------------ include/linux/cpuhotplug.h | 1 + include/linux/sched.h | 2 ++ 3 files changed, 3 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 17017051bfb1..b22b000cf6ee 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -59,18 +59,6 @@ struct notifier_block; * CPU notifier priorities. */ enum { - /* - * SCHED_ACTIVE marks a cpu which is coming up active during - * CPU_ONLINE and CPU_DOWN_FAILED and must be the first notifier. Is - * also cpuset according to cpu_active mask right after activating the - * cpu. During CPU_DOWN_PREPARE, SCHED_INACTIVE reversed the operation. - * - * This ordering guarantees consistent cpu_active mask and - * migration behavior to all cpu notifiers. - */ - CPU_PRI_SCHED_ACTIVE = INT_MAX, - CPU_PRI_SCHED_INACTIVE = INT_MIN, - /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 99fd1d2f76fe..9e07468bf1c5 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -13,6 +13,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_ACTIVE, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_NOTIFY_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/sched.h b/include/linux/sched.h index 39597d0a005e..1e5f961b1a74 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -373,6 +373,8 @@ extern void trap_init(void); extern void update_process_times(int user); extern void scheduler_tick(void); extern int sched_cpu_starting(unsigned int cpu); +extern int sched_cpu_activate(unsigned int cpu); +extern int sched_cpu_deactivate(unsigned int cpu); extern void sched_show_task(struct task_struct *p); -- cgit v1.2.3 From f2785ddb5367e217365099294b89d6a84668069e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Mar 2016 12:54:18 +0100 Subject: sched/hotplug: Move migration CPU_DYING to sched_cpu_dying() Remove the hotplug notifier and make it an explicit state. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160310120025.502222097@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 2 -- include/linux/sched.h | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index b22b000cf6ee..21597dcac0e2 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -59,9 +59,7 @@ struct notifier_block; * CPU notifier priorities. */ enum { - /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, - CPU_PRI_MIGRATION = 10, /* bring up workqueues before normal notifiers and down after */ CPU_PRI_WORKQUEUE_UP = 5, diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e5f961b1a74..47835cf8aefa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -376,6 +376,12 @@ extern int sched_cpu_starting(unsigned int cpu); extern int sched_cpu_activate(unsigned int cpu); extern int sched_cpu_deactivate(unsigned int cpu); +#ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_dying(unsigned int cpu); +#else +# define sched_cpu_dying NULL +#endif + extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_LOCKUP_DETECTOR -- cgit v1.2.3 From aaddd7d1c740ab3c5efaad7a34650b6dc680c21c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Mar 2016 12:54:19 +0100 Subject: sched/hotplug: Make activate() the last hotplug step The scheduler can handle per cpu threads before the cpu is set to active and it does not allow user space threads on the cpu before active is set. Attaching to the scheduling domains is also not required before user space threads can be handled. Move the activation to the end of the hotplug state space. That also means that deactivation is the first action when a cpu is shut down. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160310120025.597477199@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 9e07468bf1c5..386374d19987 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -13,11 +13,11 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, - CPUHP_AP_ACTIVE, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_NOTIFY_ONLINE, CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30, + CPUHP_AP_ACTIVE, CPUHP_ONLINE, }; -- cgit v1.2.3 From bc3c57c13256c748a4e71d2ea7481d0c953e88e1 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 4 May 2016 17:01:36 -0500 Subject: rpmsg: add THIS_MODULE to rpmsg_driver in rpmsg core Add register_rpmsg_driver helper macro that adds THIS_MODULE to rpmsg_driver for the registering driver. We rename and modify the existing register_rpmsg_driver to enable this. Signed-off-by: Andrew F. Davis Acked-by: Suman Anna Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 82a673905edb..7ff5790c185a 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -169,7 +169,7 @@ struct rpmsg_driver { int register_rpmsg_device(struct rpmsg_channel *dev); void unregister_rpmsg_device(struct rpmsg_channel *dev); -int register_rpmsg_driver(struct rpmsg_driver *drv); +int __register_rpmsg_driver(struct rpmsg_driver *drv, struct module *owner); void unregister_rpmsg_driver(struct rpmsg_driver *drv); void rpmsg_destroy_ept(struct rpmsg_endpoint *); struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_channel *, @@ -177,6 +177,10 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_channel *, int rpmsg_send_offchannel_raw(struct rpmsg_channel *, u32, u32, void *, int, bool); +/* use a macro to avoid include chaining to get THIS_MODULE */ +#define register_rpmsg_driver(drv) \ + __register_rpmsg_driver(drv, THIS_MODULE) + /** * rpmsg_send() - send a message across to the remote processor * @rpdev: the rpmsg channel -- cgit v1.2.3 From f3d9f1ce079370d19b34fc9927b8b1355ac98503 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 4 May 2016 17:01:38 -0500 Subject: rpmsg: add helper macro module_rpmsg_driver This patch introduces the module_rpmsg_driver macro which is a convenience macro for rpmsg driver modules similar to module_platform_driver. It is intended to be used by drivers which init/exit section does nothing but register/unregister the rpmsg driver. By using this macro it is possible to eliminate a few lines of boilerplate code per rpmsg driver. Signed-off-by: Andrew F. Davis Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 7ff5790c185a..ada50ff36da0 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -181,6 +181,18 @@ rpmsg_send_offchannel_raw(struct rpmsg_channel *, u32, u32, void *, int, bool); #define register_rpmsg_driver(drv) \ __register_rpmsg_driver(drv, THIS_MODULE) +/** + * module_rpmsg_driver() - Helper macro for registering an rpmsg driver + * @__rpmsg_driver: rpmsg_driver struct + * + * Helper macro for rpmsg drivers which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_rpmsg_driver(__rpmsg_driver) \ + module_driver(__rpmsg_driver, register_rpmsg_driver, \ + unregister_rpmsg_driver) + /** * rpmsg_send() - send a message across to the remote processor * @rpdev: the rpmsg channel -- cgit v1.2.3 From 1145e6351a9fefe0965df4c6dba2a04156dc47d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:02:56 +0200 Subject: lightnvm: implement nvm_submit_ppa_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nvm_submit_ppa function assumes that users manage all plane blocks as a single block. Extend the API with nvm_submit_ppa_list to allow the user to send its own ppa list. If the user submits more than a single PPA, the user must take care to allocate and free the corresponding ppa list. Reviewed by: Johannes Thumshirn Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index cdcb2ccbefa8..38814e262872 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -534,6 +534,8 @@ extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *); extern void nvm_end_io(struct nvm_rq *, int); extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int, void *, int); +extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int, + int, void *, int); /* sysblk.c */ #define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */ -- cgit v1.2.3 From 4891d120b9cd419f4350b11e1231083745dcdc8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:02:57 +0200 Subject: lightnvm: add fpg_size and pfpg_size to struct nvm_dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flash page size (fpg) and size across planes (pfpg) are convenient to know when allocating buffer sizes. This has previously been a calculated in various places. Replace with the pre-calculated values. Reviewed by: Johannes Thumshirn Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 38814e262872..f7c607f96858 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -323,6 +323,8 @@ struct nvm_dev { int sec_per_pg; /* only sectors for a single page */ int pgs_per_blk; int blks_per_lun; + int fpg_size; + int pfpg_size; /* size of buffer if all pages are to be read */ int sec_size; int oob_size; int mccap; -- cgit v1.2.3 From 22e8c9766a669d49cf3749d397082a5cd93374a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:02:58 +0200 Subject: lightnvm: move block fold outside of get_bb_tbl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The get block table command returns a list of blocks and planes with their associated state. Users, such as gennvm and sysblk, manages all planes as a single virtual block. It was therefore natural to fold the bad block list before it is returned. However, to allow users, which manages on a per-plane block level, to also use the interface, the get_bb_tbl interface is changed to not fold by default and instead let the caller fold if necessary. Reviewed by: Johannes Thumshirn Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index f7c607f96858..dacaa2850428 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -41,11 +41,12 @@ struct nvm_id; struct nvm_dev; typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); -typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); +typedef int (nvm_bb_update_fn)(struct nvm_dev *, struct ppa_addr, u8 *, int, + void *); typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *, void *); -typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int, +typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, nvm_bb_update_fn *, void *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); @@ -538,6 +539,7 @@ extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int, void *, int); extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int, int, void *, int); +extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); /* sysblk.c */ #define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */ -- cgit v1.2.3 From 6063fe399d0913e7bd4462650cd4f31b479a83c9 Mon Sep 17 00:00:00 2001 From: "Simon A. F. Lund" Date: Fri, 6 May 2016 20:03:02 +0200 Subject: lightnvm: rename nvm_targets to nvm_tgt_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions nvm_register_target(), nvm_unregister_target() and associated list refers to a target type that is being registered by a target type module. Rename nvm_*_targets() to nvm_*_tgt_type(), so that the intension is clear. This enables target instances to use the _nvm_*_targets() naming. Signed-off-by: Simon A. F. Lund Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index dacaa2850428..497da91510b2 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -453,8 +453,8 @@ struct nvm_tgt_type { struct list_head list; }; -extern int nvm_register_target(struct nvm_tgt_type *); -extern void nvm_unregister_target(struct nvm_tgt_type *); +extern int nvm_register_tgt_type(struct nvm_tgt_type *); +extern void nvm_unregister_tgt_type(struct nvm_tgt_type *); extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); -- cgit v1.2.3 From 6f8645cba54a4b2983bbd46f462b5891ffbd72fc Mon Sep 17 00:00:00 2001 From: "Simon A. F. Lund" Date: Fri, 6 May 2016 20:03:03 +0200 Subject: lightnvm: refactor dev->online_target to global nvm_targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A target name must be unique. However, a per-device registration of targets is maintained on a dev->online_targets list, with a per-device search for targets upon registration. This results in a name collision when two targets, with the same name, are created on two different targets, where the per-device list is not shared. Signed-off-by: Simon A. F. Lund Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 497da91510b2..5eabdbaa87bc 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -308,7 +308,6 @@ struct nvm_dev { struct nvm_dev_ops *ops; struct list_head devices; - struct list_head online_targets; /* Media manager */ struct nvmm_type *mt; -- cgit v1.2.3 From 5136061ce705210b501ed9ecd673a67b74ebe017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:03:04 +0200 Subject: lightnvm: introduce nvm_for_each_lun_ppa() macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users that wish to iterate all luns on a device. Must create a struct ppa_addr and separate iterators for channels and luns. To set the iterators, two loops are required, one to iterate channels, and another to iterate luns. This leads to decrease in readability. Introduce nvm_for_each_lun_ppa, which implements the nested loop and sets ppa, channel, and lun variable for each loop body, eliminating the boilerplate code. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 5eabdbaa87bc..3f256355b9fa 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -559,6 +559,13 @@ extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *); extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *); extern int nvm_dev_factory(struct nvm_dev *, int flags); + +#define nvm_for_each_lun_ppa(dev, ppa, chid, lunid) \ + for ((chid) = 0, (ppa).ppa = 0; (chid) < (dev)->nr_chnls; \ + (chid)++, (ppa).g.ch = (chid)) \ + for ((lunid) = 0; (lunid) < (dev)->luns_per_chnl; \ + (lunid)++, (ppa).g.lun = (lunid)) + #else /* CONFIG_NVM */ struct nvm_dev_ops; -- cgit v1.2.3 From e11903f5dfeb4f59fe93316d47f2ee5982e91e60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:03:05 +0200 Subject: lightnvm: refactor device ops->get_bb_tbl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The device ops->get_bb_tbl() takes a callback, that allows the caller to use its own callback function to update its data structures in the returning function. This makes it difficult to send parameters to the callback, and usually is circumvented by small private structures, that both carry the callers state and any flags needed to fulfill the update. Refactor ops->get_bb_tbl() to fill a data buffer with the status of the blocks returned, and let the user call the callback function manually. That will provide the necessary flags and data structures and simplify the logic around ops->get_bb_tbl(). Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 3f256355b9fa..16d4f2edf5b4 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -41,13 +41,10 @@ struct nvm_id; struct nvm_dev; typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); -typedef int (nvm_bb_update_fn)(struct nvm_dev *, struct ppa_addr, u8 *, int, - void *); typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *, void *); -typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, - nvm_bb_update_fn *, void *); +typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); @@ -539,6 +536,7 @@ extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int, extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int, int, void *, int); extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); +extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *); /* sysblk.c */ #define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */ -- cgit v1.2.3 From 5ebc7d9fe13ff9bd3622d0be3cd39c8751459be6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:03:07 +0200 Subject: lightnvm: make nvm_set_rqd_ppalist() aware of vblks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A virtual block enables a block to identify multiple physical blocks. This is useful for metadata where a device media supports multiple planes. In that case, a block, with multiple planes can be managed as a single vblk. Reducing the metadata required by one forth. nvm_set_rqd_ppalist() takes care of expanding a ppa_list with vblks automatically. However, for some use-cases, where only a single physical block is required, the ppa_list should not be expanded. Therefore, add a vblk parameter to nvm_set_rqd_ppalist(), and only expand the ppa_list if vblk is set. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 16d4f2edf5b4..9ae0b7c6deb2 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -526,7 +526,7 @@ extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *); extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *); extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *); extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, - struct ppa_addr *, int); + struct ppa_addr *, int, int); extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int); extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *); -- cgit v1.2.3 From 00ee6cc3b74bdb4dfb71838046517beb6839647c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:03:09 +0200 Subject: lightnvm: refactor set_bb_tbl for accepting ppa list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The set_bb_tbl takes struct nvm_rq and only uses its ppa_list and nr_pages internally. Instead, make these two variables explicit. This allows a user to call it without initializing a struct nvm_rq first. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 9ae0b7c6deb2..af72ca75dced 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -45,7 +45,7 @@ typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *, void *); typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); -typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int); +typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); -- cgit v1.2.3 From 003fad376b924fc3a61c659f38da70356ec144fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 6 May 2016 20:03:12 +0200 Subject: lightnvm: enable metadata to be sent to device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable metadata buffer to be sent to the device through the metadata field on the physical rw nvme command. The size of the metadata buffer must follow dev->oob_size * # of PPAs. Signed-off-by: Javier González Updated description. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index af72ca75dced..678df4d4354d 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -230,8 +230,8 @@ struct nvm_rq { struct ppa_addr *ppa_list; - void *metadata; - dma_addr_t dma_metadata; + void *meta_list; + dma_addr_t dma_meta_list; struct completion *wait; nvm_end_io_fn *end_io; -- cgit v1.2.3 From 75b8564932ef646e8620deffacbe134846333948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 6 May 2016 20:03:13 +0200 Subject: lightnvm: rename dma helper functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now, the dma pool have been exclusively used to allocate the ppa list being sent to the device. In pblk (upcoming), we use these pools to allocate metadata too. Thus, we generalize the names of some variables on the dma helper functions to make the code more readable. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 678df4d4354d..0e8e01956325 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -347,7 +347,7 @@ struct nvm_dev { unsigned max_pages_per_blk; unsigned long *lun_map; - void *ppalist_pool; + void *dma_pool; struct nvm_id identity; -- cgit v1.2.3 From 976bdfcae32ea10c2c8c2ecaeb0d85873f634dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:03:17 +0200 Subject: lightnvm: remove mgt targets on mgt removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Targets associated with a device manager are not freed on device removal. They have to be manually removed before shutdown. Make sure any outstanding targets are freed upon shutdown. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 0e8e01956325..cde31ffe2d62 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -200,6 +200,7 @@ struct nvm_id { struct nvm_target { struct list_head list; + struct nvm_dev *dev; struct nvm_tgt_type *type; struct gendisk *disk; }; -- cgit v1.2.3 From 04a8aa173bd9410849526a80dcf733da9c3e142d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:03:18 +0200 Subject: lightnvm: expose gennvm_mark_blk to targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Targets can update a block state when having a reference to an in-memory virtual block. In the case that a target does not keep the block metadata in memory, it does not have a way to update this structure. Therefore, expose gennvm_mark_blk() through the media managers ->mark_blk() callback and let targets update the state structure through this callback. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index cde31ffe2d62..3c53911e5758 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -467,6 +467,7 @@ typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *); typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, unsigned long); +typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int); typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int); typedef int (nvmm_reserve_lun)(struct nvm_dev *, int); typedef void (nvmm_release_lun)(struct nvm_dev *, int); @@ -494,6 +495,9 @@ struct nvmm_type { nvmm_submit_io_fn *submit_io; nvmm_erase_blk_fn *erase_blk; + /* Bad block mgmt */ + nvmm_mark_blk_fn *mark_blk; + /* Configuration management */ nvmm_get_lun_fn *get_lun; nvmm_reserve_lun *reserve_lun; -- cgit v1.2.3 From df414b33bb1eb3a0ae52ccd4ecfec9323a4f89dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 6 May 2016 20:03:19 +0200 Subject: lightnvm: add is_cached entry to struct ppa_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A target requires a method to identify PPAs that are either cached in memory or on disk. This can efficiently be maintained within the PPA. The target host-side translation table can then lookup a PPA and know from the PPA if it is cached or on disk. In the case it is cached, it is the responsibility of the target to maintain this cache. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 3c53911e5758..3a810d7bd25e 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -18,7 +18,7 @@ enum { #define NVM_SEC_BITS (8) #define NVM_PL_BITS (8) #define NVM_LUN_BITS (8) -#define NVM_CH_BITS (8) +#define NVM_CH_BITS (7) struct ppa_addr { /* Generic structure for all addresses */ @@ -30,8 +30,14 @@ struct ppa_addr { u64 pl : NVM_PL_BITS; u64 lun : NVM_LUN_BITS; u64 ch : NVM_CH_BITS; + u64 reserved : 1; } g; + struct { + u64 line : 63; + u64 is_cached : 1; + } c; + u64 ppa; }; }; -- cgit v1.2.3 From 6d5be9590b5e15124e3c8b319c8d7ce01abcf07d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 6 May 2016 20:03:20 +0200 Subject: lightnvm: rename nr_pages to nr_ppas on nvm_rq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of ppas contained on a request is not necessarily the number of pages that it maps to neither on the target nor on the device side. In order to avoid confusion, rename nr_pages to nr_ppas since it is what the variable actually contains. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 3a810d7bd25e..b2991c724640 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -244,7 +244,7 @@ struct nvm_rq { nvm_end_io_fn *end_io; uint8_t opcode; - uint16_t nr_pages; + uint16_t nr_ppas; uint16_t flags; u64 ppa_status; /* ppa media status */ -- cgit v1.2.3 From 116f7d4a21fe450efc652c4850eb27cda36c9db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 6 May 2016 20:03:21 +0200 Subject: lightnvm: reserved space calculation incorrect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nvm_dev->max_pages_per_blk variable was removed in favor of the new nvm->sec_per_blk variable. The ->max_pages_per_blk variable was still used in rrpc_capacity, reporting the reserved capacity to zero. Replace with ->sec_per_blk to calculate the reserved area again. Signed-off-by: Javier González Updated patch description. Was "lightnvm: eliminate redundant variable" Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index b2991c724640..ef2c7d2e76c4 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -351,7 +351,6 @@ struct nvm_dev { unsigned long total_blocks; unsigned long total_secs; int nr_luns; - unsigned max_pages_per_blk; unsigned long *lun_map; void *dma_pool; -- cgit v1.2.3 From db58ba45920255e967cc1d62a430cebd634b5046 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 5 May 2016 19:49:12 -0700 Subject: bpf: wire in data and data_end for cls_act_bpf allow cls_bpf and act_bpf programs access skb->data and skb->data_end pointers. The bpf helpers that change skb->data need to update data_end pointer as well. The verifier checks that programs always reload data, data_end pointers after calls to such bpf helpers. We cannot add 'data_end' pointer to struct qdisc_skb_cb directly, since it's embedded as-is by infiniband ipoib, so wrapper struct is needed. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 43aa1f8855c7..ec1411c89105 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -352,6 +352,22 @@ struct sk_filter { #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN +struct bpf_skb_data_end { + struct qdisc_skb_cb qdisc_cb; + void *data_end; +}; + +/* compute the linear packet data range [data, data_end) which + * will be accessed by cls_bpf and act_bpf programs + */ +static inline void bpf_compute_data_end(struct sk_buff *skb) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb)); + cb->data_end = skb->data + skb_headlen(skb); +} + static inline u8 *bpf_skb_cb(struct sk_buff *skb) { /* eBPF programs may read/write skb->cb[] area to transfer meta -- cgit v1.2.3 From 229740c63169462a838a8b8e16391ed000934631 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 3 May 2016 16:10:21 -0700 Subject: udp_offload: Set encapsulation before inner completes. UDP tunnel segmentation code relies on the inner offsets being set for an UDP tunnel GSO packet, but the inner *_complete() functions will set the inner offsets only if 'encapsulation' is set before calling them. Currently, udp_gro_complete() sets 'encapsulation' only after the inner *_complete() functions are done. This causes the inner offsets having invalid values after udp_gro_complete() returns, which in turn will make it impossible to properly segment the packet in case it needs to be forwarded, which would be visible to the user either as invalid packets being sent or as packet loss. This patch fixes this by setting skb's 'encapsulation' in udp_gro_complete() before calling into the inner complete functions, and by making each possible UDP tunnel gro_complete() callback set the inner_mac_header to the beginning of the tunnel payload. Signed-off-by: Jarno Rajahalme Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3c46b019ac1..78181a88903b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2164,6 +2164,9 @@ struct packet_offload { struct udp_offload; +/* 'skb->encapsulation' is set before gro_complete() is called. gro_complete() + * must set 'skb->inner_mac_header' to the beginning of tunnel payload. + */ struct udp_offload_callbacks { struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb, -- cgit v1.2.3 From 1cfd63166c8e7494a1b457df9bb291a601091c26 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 6 May 2016 22:39:30 +0100 Subject: efi: Merge boolean flag arguments The parameters atomic and duplicates of efivar_init always have opposite values. Drop the parameter atomic, replace the uses of !atomic with duplicates, and update the call sites accordingly. The code using duplicates is slightly reorganized with an 'else', to avoid duplicating the lock code. Signed-off-by: Julia Lawall Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jeremy Kerr Cc: Linus Torvalds Cc: Matthew Garrett Cc: Peter Zijlstra Cc: Saurabh Sengar Cc: Thomas Gleixner Cc: Vaishali Thakkar Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1462570771-13324-5-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index aa36fb8bea4b..df7acb51f3cc 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1336,8 +1336,7 @@ int efivars_unregister(struct efivars *efivars); struct kobject *efivars_kobject(void); int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), - void *data, bool atomic, bool duplicates, - struct list_head *head); + void *data, bool duplicates, struct list_head *head); void efivar_entry_add(struct efivar_entry *entry, struct list_head *head); void efivar_entry_remove(struct efivar_entry *entry); -- cgit v1.2.3 From b5a7aef1ef436ec005fef0efe31a676ec5f4ab31 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 4 May 2016 22:05:01 -0700 Subject: fscrypto/f2fs: allow fs-specific key prefix for fs encryption This patch allows fscrypto to handle a second key prefix given by filesystem. The main reason is to provide backward compatibility, since previously f2fs used "f2fs:" as a crypto prefix instead of "fscrypt:". Later, ext4 should also provide key_prefix() to give "ext4:". One concern decribed by Ted would be kinda double check overhead of prefixes. In x86, for example, validate_user_key consumes 8 ms after boot-up, which turns out derive_key_aes() consumed most of the time to load specific crypto module. After such the cold miss, it shows almost zero latencies, which treats as a negligible overhead. Note that request_key() detects wrong prefix in prior to derive_key_aes() even. Cc: Ted Tso Cc: stable@vger.kernel.org # v4.6 Signed-off-by: Jaegeuk Kim --- include/linux/fscrypto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index 6027f6bbb061..cfa6cde25f8e 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -175,6 +175,7 @@ struct fscrypt_name { */ struct fscrypt_operations { int (*get_context)(struct inode *, void *, size_t); + int (*key_prefix)(struct inode *, u8 **); int (*prepare_context)(struct inode *); int (*set_context)(struct inode *, const void *, size_t, void *); int (*dummy_context)(struct inode *); -- cgit v1.2.3 From 824815c4cd13e384ef5a4be725ec4b06e4ad2c2a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 31 Mar 2016 16:23:51 -0400 Subject: reservation: add reservation_object_get_excl_rcu() In the atomic modesetting path, each driver simply wants to grab a ref to the exclusive fence from a reservation object to store in the incoming drm_plane_state, without doing the whole RCU dance. Since each driver will need to do this, lets make a helper. v2: rename to _rcu instead of _unlocked to be more consistent Signed-off-by: Rob Clark Acked-by: Sumit Semwal --- include/linux/reservation.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 5a0b64cf68b4..49d057655d62 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -120,6 +120,24 @@ reservation_object_get_excl(struct reservation_object *obj) reservation_object_held(obj)); } +static inline struct fence * +reservation_object_get_excl_rcu(struct reservation_object *obj) +{ + struct fence *fence; + unsigned seq; +retry: + seq = read_seqcount_begin(&obj->seq); + rcu_read_lock(); + fence = rcu_dereference(obj->fence_excl); + if (read_seqcount_retry(&obj->seq, seq)) { + rcu_read_unlock(); + goto retry; + } + fence = fence_get(fence); + rcu_read_unlock(); + return fence; +} + int reservation_object_reserve_shared(struct reservation_object *obj); void reservation_object_add_shared_fence(struct reservation_object *obj, struct fence *fence); -- cgit v1.2.3 From 43315f31adc2bf3b35e04dcf2372c3bb08014ed1 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 6 May 2016 07:09:07 -0700 Subject: soc: qcom: smd: Introduce compile stubs Introduce compile stubs for the SMD API, allowing consumers to be compile tested. Acked-by: Andy Gross Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- include/linux/soc/qcom/smd.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index d0cb6d189a0a..46a984f5e3a3 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -45,13 +45,39 @@ struct qcom_smd_driver { int (*callback)(struct qcom_smd_device *, const void *, size_t); }; +#if IS_ENABLED(CONFIG_QCOM_SMD) + int qcom_smd_driver_register(struct qcom_smd_driver *drv); void qcom_smd_driver_unregister(struct qcom_smd_driver *drv); +int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); + +#else + +static inline int qcom_smd_driver_register(struct qcom_smd_driver *drv) +{ + return -ENXIO; +} + +static inline void qcom_smd_driver_unregister(struct qcom_smd_driver *drv) +{ + /* This shouldn't be possible */ + WARN_ON(1); +} + +static inline int qcom_smd_send(struct qcom_smd_channel *channel, + const void *data, int len) +{ + /* This shouldn't be possible */ + WARN_ON(1); + return -ENXIO; +} + +#endif + #define module_qcom_smd_driver(__smd_driver) \ module_driver(__smd_driver, qcom_smd_driver_register, \ qcom_smd_driver_unregister) -int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); #endif -- cgit v1.2.3 From bdabad3e363d825ddf9679dd431cca0b2c30f881 Mon Sep 17 00:00:00 2001 From: Courtney Cavin Date: Fri, 6 May 2016 07:09:08 -0700 Subject: net: Add Qualcomm IPC router Add an implementation of Qualcomm's IPC router protocol, used to communicate with service providing remote processors. Signed-off-by: Courtney Cavin Signed-off-by: Bjorn Andersson [bjorn: Cope with 0 being a valid node id and implement RTM_NEWADDR] Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- include/linux/socket.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 73bf6c6a833b..b5cc5a6d7011 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -201,8 +201,9 @@ struct ucred { #define AF_NFC 39 /* NFC sockets */ #define AF_VSOCK 40 /* vSockets */ #define AF_KCM 41 /* Kernel Connection Multiplexor*/ +#define AF_QIPCRTR 42 /* Qualcomm IPC Router */ -#define AF_MAX 42 /* For now.. */ +#define AF_MAX 43 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -249,6 +250,7 @@ struct ucred { #define PF_NFC AF_NFC #define PF_VSOCK AF_VSOCK #define PF_KCM AF_KCM +#define PF_QIPCRTR AF_QIPCRTR #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ -- cgit v1.2.3 From 0e10d549f6eebd0a26bf075309b6fb947f4c1cb2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Apr 2016 17:44:18 +0300 Subject: mfd: wm8400-core: Delete wm8400_reg_read() There was a static checker warning in wm8400_reg_read() because we were returning u16 and that can't hold the negative error codes. The function isn't used, so let's just delete it. Signed-off-by: Dan Carpenter Acked-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/wm8400-private.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8400-private.h b/include/linux/mfd/wm8400-private.h index 2de565b94d0c..4ee908f5b834 100644 --- a/include/linux/mfd/wm8400-private.h +++ b/include/linux/mfd/wm8400-private.h @@ -923,7 +923,6 @@ struct wm8400 { #define WM8400_LINE_CMP_VTHD_SHIFT 0 /* LINE_CMP_VTHD - [3:0] */ #define WM8400_LINE_CMP_VTHD_WIDTH 4 /* LINE_CMP_VTHD - [3:0] */ -u16 wm8400_reg_read(struct wm8400 *wm8400, u8 reg); int wm8400_block_read(struct wm8400 *wm8400, u8 reg, int count, u16 *data); static inline int wm8400_set_bits(struct wm8400 *wm8400, u8 reg, -- cgit v1.2.3 From bb208f144cf3f59d8f89a09a80efd04389718907 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 21 Mar 2016 20:18:21 +0100 Subject: can: fix handling of unmodifiable configuration options As described in 'can: m_can: tag current CAN FD controllers as non-ISO' (6cfda7fbebe) it is possible to define fixed configuration options by setting the according bit in 'ctrlmode' and clear it in 'ctrlmode_supported'. This leads to the incovenience that the fixed configuration bits can not be passed by netlink even when they have the correct values (e.g. non-ISO, FD). This patch fixes that issue and not only allows fixed set bit values to be set again but now requires(!) to provide these fixed values at configuration time. A valid CAN FD configuration consists of a nominal/arbitration bittiming, a data bittiming and a control mode with CAN_CTRLMODE_FD set - which is now enforced by a new can_validate() function. This fix additionally removed the inconsistency that was prohibiting the support of 'CANFD-only' controller drivers, like the RCar CAN FD. For this reason a new helper can_set_static_ctrlmode() has been introduced to provide a proper interface to handle static enabled CAN controller options. Reported-by: Ramesh Shanmugasundaram Signed-off-by: Oliver Hartkopp Reviewed-by: Ramesh Shanmugasundaram Cc: # >= 3.18 Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 735f9f8c4e43..5261751f6bd4 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -40,8 +40,11 @@ struct can_priv { struct can_clock clock; enum can_state state; - u32 ctrlmode; - u32 ctrlmode_supported; + + /* CAN controller features - see include/uapi/linux/can/netlink.h */ + u32 ctrlmode; /* current options setting */ + u32 ctrlmode_supported; /* options that can be modified by netlink */ + u32 ctrlmode_static; /* static enabled options for driver/hardware */ int restart_ms; struct timer_list restart_timer; @@ -108,6 +111,21 @@ static inline bool can_is_canfd_skb(const struct sk_buff *skb) return skb->len == CANFD_MTU; } +/* helper to define static CAN controller features at device creation time */ +static inline void can_set_static_ctrlmode(struct net_device *dev, + u32 static_mode) +{ + struct can_priv *priv = netdev_priv(dev); + + /* alloc_candev() succeeded => netdev_priv() is valid at this point */ + priv->ctrlmode = static_mode; + priv->ctrlmode_static = static_mode; + + /* override MTU which was set by default in can_setup()? */ + if (static_mode & CAN_CTRLMODE_FD) + dev->mtu = CANFD_MTU; +} + /* get data length from can_dlc with sanitized can_dlc */ u8 can_dlc2len(u8 can_dlc); -- cgit v1.2.3 From fe238e601d2519f259103ab65caea3b077ed7b39 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Fri, 1 Apr 2016 13:45:09 -0400 Subject: NFS: Save struct inode * inside nfs_commit_info to clarify usage of i_lock Commit ea2cf22 created nfs_commit_info and saved &inode->i_lock inside this NFS specific structure. This obscures the usage of i_lock. Instead, save struct inode * so later it's clear the spinlock taken is i_lock. Should be no functional change. Signed-off-by: Dave Wysochanski Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d320906cf13e..cb9982d8f38f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1431,7 +1431,7 @@ struct nfs_commit_completion_ops { }; struct nfs_commit_info { - spinlock_t *lock; /* inode->i_lock */ + struct inode *inode; /* Needed for inode->i_lock */ struct nfs_mds_commit_info *mds; struct pnfs_ds_commit_info *ds; struct nfs_direct_req *dreq; /* O_DIRECT request */ -- cgit v1.2.3 From 3c6e0bc8a14cfc8e1d4ab87f46f77b070c815bf1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 21 Apr 2016 20:51:54 -0400 Subject: sunrpc: plumb gfp_t parm into crcreate operation We need to be able to call the generic_cred creator from different contexts. Add a gfp_t parm to the crcreate operation and to rpcauth_lookup_credcache. For now, we just push the gfp_t parms up one level to the *_lookup_cred functions. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 6a241a277249..3b616aa7e4d2 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -127,7 +127,7 @@ struct rpc_authops { void (*destroy)(struct rpc_auth *); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); - struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); + struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t); int (*list_pseudoflavors)(rpc_authflavor_t *, int); rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); int (*flavor2info)(rpc_authflavor_t, @@ -178,7 +178,7 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, int rpcauth_get_gssinfo(rpc_authflavor_t, struct rpcsec_gss_info *); int rpcauth_list_flavors(rpc_authflavor_t *, int); -struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); +struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int); -- cgit v1.2.3 From c065d229e308ede426a3608cf480c61983b36fb8 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 21 Apr 2016 20:51:55 -0400 Subject: sunrpc: add rpc_lookup_generic_cred Add function rpc_lookup_generic_cred, which allows lookups of a generic credential that's not current_cred(). [jlayton: add gfp_t parm] Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 3b616aa7e4d2..16bd8f8fef8c 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -167,6 +167,7 @@ void rpc_destroy_authunix(void); struct rpc_cred * rpc_lookup_cred(void); struct rpc_cred * rpc_lookup_cred_nonblock(void); +struct rpc_cred * rpc_lookup_generic_cred(struct auth_cred *, int, gfp_t); struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); -- cgit v1.2.3 From 62dbef2ae41393eba2f6853ca174130f2d09c7d3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 21 Apr 2016 20:51:56 -0400 Subject: sunrpc: add a get_rpccred_rcu inline Sometimes we might have a RCU managed credential pointer and don't want to use locking to handle it. Add a function that will take a reference to the cred iff the refcount is not already zero. Callers can dereference the pointer under the rcu_read_lock and use that function to take a reference only if the cred is not on its way to destruction. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 16bd8f8fef8c..6f36b2bf3e05 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -206,5 +206,23 @@ struct rpc_cred * get_rpccred(struct rpc_cred *cred) return cred; } +/** + * get_rpccred_rcu - get a reference to a cred using rcu-protected pointer + * @cred: cred of which to take a reference + * + * In some cases, we may have a pointer to a credential to which we + * want to take a reference, but don't already have one. Because these + * objects are freed using RCU, we can access the cr_count while its + * on its way to destruction and only take a reference if it's not already + * zero. + */ +static inline struct rpc_cred * +get_rpccred_rcu(struct rpc_cred *cred) +{ + if (atomic_inc_not_zero(&cred->cr_count)) + return cred; + return NULL; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_H */ -- cgit v1.2.3 From 3c3e8943ac6f36ca5d18ca61b30634fb560b4ebb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 7 Apr 2016 18:42:04 +0100 Subject: iommu: remove unused priv field from struct iommu_ops The priv field from iommu_ops is a hangover from the of_dma_configure series and isn't actually used. Remove it before it has chance to spread. Signed-off-by: Will Deacon Acked-by: Laurent Pinchart Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ef7a6ecd8584..8a2570443b80 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -156,7 +156,6 @@ struct iommu_dm_region { * @domain_get_windows: Return the number of windows for a domain * @of_xlate: add OF master IDs to iommu grouping * @pgsize_bitmap: bitmap of supported page sizes - * @priv: per-instance data private to the iommu driver */ struct iommu_ops { bool (*capable)(enum iommu_cap); @@ -198,7 +197,6 @@ struct iommu_ops { int (*of_xlate)(struct device *dev, struct of_phandle_args *args); unsigned long pgsize_bitmap; - void *priv; }; #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ -- cgit v1.2.3 From 53c92d793395fdab9edbd2f79b084bb6b2e6ae79 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 7 Apr 2016 18:42:05 +0100 Subject: iommu: of: enforce const-ness of struct iommu_ops As a set of driver-provided callbacks and static data, there is no compelling reason for struct iommu_ops to be mutable in core code, so enforce const-ness throughout. Acked-by: Thierry Reding Signed-off-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- include/linux/dma-mapping.h | 2 +- include/linux/of_iommu.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 9ea9aba28049..71c1b215ef66 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -514,7 +514,7 @@ extern u64 dma_get_required_mask(struct device *dev); #ifndef arch_setup_dma_ops static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, struct iommu_ops *iommu, + u64 size, const struct iommu_ops *iommu, bool coherent) { } #endif diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index ffbe4707d4aa..bd02b44902d0 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -12,7 +12,7 @@ extern int of_get_dma_window(struct device_node *dn, const char *prefix, size_t *size); extern void of_iommu_init(void); -extern struct iommu_ops *of_iommu_configure(struct device *dev, +extern const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np); #else @@ -25,7 +25,7 @@ static inline int of_get_dma_window(struct device_node *dn, const char *prefix, } static inline void of_iommu_init(void) { } -static inline struct iommu_ops *of_iommu_configure(struct device *dev, +static inline const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np) { return NULL; @@ -33,8 +33,8 @@ static inline struct iommu_ops *of_iommu_configure(struct device *dev, #endif /* CONFIG_OF_IOMMU */ -void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops); -struct iommu_ops *of_iommu_get_ops(struct device_node *np); +void of_iommu_set_ops(struct device_node *np, const struct iommu_ops *ops); +const struct iommu_ops *of_iommu_get_ops(struct device_node *np); extern struct of_device_id __iommu_of_table; -- cgit v1.2.3 From d16e0faab911cc0e100a1e8e93635b432566608e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 7 Apr 2016 18:42:06 +0100 Subject: iommu: Allow selecting page sizes per domain Many IOMMUs support multiple page table formats, meaning that any given domain may only support a subset of the hardware page sizes presented in iommu_ops->pgsize_bitmap. There are also certain use-cases where the creator of a domain may want to control which page sizes are used, for example to force the use of hugepage mappings to reduce pagetable walk depth. To this end, add a per-domain pgsize_bitmap to represent the subset of page sizes actually in use, to make it possible for domains with different requirements to coexist. Signed-off-by: Will Deacon [rm: hijacked and rebased original patch with new commit message] Signed-off-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 8a2570443b80..7811294bc0f7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -78,6 +78,7 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; const struct iommu_ops *ops; + unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ iommu_fault_handler_t handler; void *handler_token; struct iommu_domain_geometry geometry; @@ -155,7 +156,7 @@ struct iommu_dm_region { * @domain_set_windows: Set the number of windows for a domain * @domain_get_windows: Return the number of windows for a domain * @of_xlate: add OF master IDs to iommu grouping - * @pgsize_bitmap: bitmap of supported page sizes + * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { bool (*capable)(enum iommu_cap); -- cgit v1.2.3 From 3b6b7e19e31a816ee02a8d4372cbea9ad7db3784 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 13 Apr 2016 17:29:10 +0100 Subject: iommu/dma: Finish optimising higher-order allocations Now that we know exactly which page sizes our caller wants to use in the given domain, we can restrict higher-order allocation attempts to just those sizes, if any, and avoid wasting any time or effort on other sizes which offer no benefit. In the same vein, this also lets us accommodate a minimum order greater than 0 for special cases. Signed-off-by: Robin Murphy Acked-by: Will Deacon Tested-by: Yong Wu Signed-off-by: Joerg Roedel --- include/linux/dma-iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index fc481037478a..8443bbb5c071 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -38,8 +38,8 @@ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent); * These implement the bulk of the relevant DMA mapping callbacks, but require * the arch code to take care of attributes and cache maintenance */ -struct page **iommu_dma_alloc(struct device *dev, size_t size, - gfp_t gfp, int prot, dma_addr_t *handle, +struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, + struct dma_attrs *attrs, int prot, dma_addr_t *handle, void (*flush_page)(struct device *, const void *, phys_addr_t)); void iommu_dma_free(struct device *dev, struct page **pages, size_t size, dma_addr_t *handle); -- cgit v1.2.3 From 327156c593600e0f08575621c2a56f311d482e7a Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Thu, 28 Apr 2016 15:28:56 +0530 Subject: mfd: max77620: Add core driver for MAX77620/MAX20024 MAX77620/MAX20024 are Power Management IC from the MAXIM. It supports RTC, multiple GPIOs, multiple DCDC and LDOs, watchdog, clock etc. Add MFD drier to provides common support for accessing the device; additional drivers is developed on respected subsystem in order to use the functionality of the device. Signed-off-by: Laxman Dewangan Signed-off-by: Mallikarjun Kasoju Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/max77620.h | 346 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 346 insertions(+) create mode 100644 include/linux/mfd/max77620.h (limited to 'include/linux') diff --git a/include/linux/mfd/max77620.h b/include/linux/mfd/max77620.h new file mode 100644 index 000000000000..3ca0af07fc78 --- /dev/null +++ b/include/linux/mfd/max77620.h @@ -0,0 +1,346 @@ +/* + * Defining registers address and its bit definitions of MAX77620 and MAX20024 + * + * Copyright (C) 2016 NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +#ifndef _MFD_MAX77620_H_ +#define _MFD_MAX77620_H_ + +#include + +/* GLOBAL, PMIC, GPIO, FPS, ONOFFC, CID Registers */ +#define MAX77620_REG_CNFGGLBL1 0x00 +#define MAX77620_REG_CNFGGLBL2 0x01 +#define MAX77620_REG_CNFGGLBL3 0x02 +#define MAX77620_REG_CNFG1_32K 0x03 +#define MAX77620_REG_CNFGBBC 0x04 +#define MAX77620_REG_IRQTOP 0x05 +#define MAX77620_REG_INTLBT 0x06 +#define MAX77620_REG_IRQSD 0x07 +#define MAX77620_REG_IRQ_LVL2_L0_7 0x08 +#define MAX77620_REG_IRQ_LVL2_L8 0x09 +#define MAX77620_REG_IRQ_LVL2_GPIO 0x0A +#define MAX77620_REG_ONOFFIRQ 0x0B +#define MAX77620_REG_NVERC 0x0C +#define MAX77620_REG_IRQTOPM 0x0D +#define MAX77620_REG_INTENLBT 0x0E +#define MAX77620_REG_IRQMASKSD 0x0F +#define MAX77620_REG_IRQ_MSK_L0_7 0x10 +#define MAX77620_REG_IRQ_MSK_L8 0x11 +#define MAX77620_REG_ONOFFIRQM 0x12 +#define MAX77620_REG_STATLBT 0x13 +#define MAX77620_REG_STATSD 0x14 +#define MAX77620_REG_ONOFFSTAT 0x15 + +/* SD and LDO Registers */ +#define MAX77620_REG_SD0 0x16 +#define MAX77620_REG_SD1 0x17 +#define MAX77620_REG_SD2 0x18 +#define MAX77620_REG_SD3 0x19 +#define MAX77620_REG_SD4 0x1A +#define MAX77620_REG_DVSSD0 0x1B +#define MAX77620_REG_DVSSD1 0x1C +#define MAX77620_REG_SD0_CFG 0x1D +#define MAX77620_REG_SD1_CFG 0x1E +#define MAX77620_REG_SD2_CFG 0x1F +#define MAX77620_REG_SD3_CFG 0x20 +#define MAX77620_REG_SD4_CFG 0x21 +#define MAX77620_REG_SD_CFG2 0x22 +#define MAX77620_REG_LDO0_CFG 0x23 +#define MAX77620_REG_LDO0_CFG2 0x24 +#define MAX77620_REG_LDO1_CFG 0x25 +#define MAX77620_REG_LDO1_CFG2 0x26 +#define MAX77620_REG_LDO2_CFG 0x27 +#define MAX77620_REG_LDO2_CFG2 0x28 +#define MAX77620_REG_LDO3_CFG 0x29 +#define MAX77620_REG_LDO3_CFG2 0x2A +#define MAX77620_REG_LDO4_CFG 0x2B +#define MAX77620_REG_LDO4_CFG2 0x2C +#define MAX77620_REG_LDO5_CFG 0x2D +#define MAX77620_REG_LDO5_CFG2 0x2E +#define MAX77620_REG_LDO6_CFG 0x2F +#define MAX77620_REG_LDO6_CFG2 0x30 +#define MAX77620_REG_LDO7_CFG 0x31 +#define MAX77620_REG_LDO7_CFG2 0x32 +#define MAX77620_REG_LDO8_CFG 0x33 +#define MAX77620_REG_LDO8_CFG2 0x34 +#define MAX77620_REG_LDO_CFG3 0x35 + +#define MAX77620_LDO_SLEW_RATE_MASK 0x1 + +/* LDO Configuration 3 */ +#define MAX77620_TRACK4_MASK BIT(5) +#define MAX77620_TRACK4_SHIFT 5 + +/* Voltage */ +#define MAX77620_SDX_VOLT_MASK 0xFF +#define MAX77620_SD0_VOLT_MASK 0x3F +#define MAX77620_SD1_VOLT_MASK 0x7F +#define MAX77620_LDO_VOLT_MASK 0x3F + +#define MAX77620_REG_GPIO0 0x36 +#define MAX77620_REG_GPIO1 0x37 +#define MAX77620_REG_GPIO2 0x38 +#define MAX77620_REG_GPIO3 0x39 +#define MAX77620_REG_GPIO4 0x3A +#define MAX77620_REG_GPIO5 0x3B +#define MAX77620_REG_GPIO6 0x3C +#define MAX77620_REG_GPIO7 0x3D +#define MAX77620_REG_PUE_GPIO 0x3E +#define MAX77620_REG_PDE_GPIO 0x3F +#define MAX77620_REG_AME_GPIO 0x40 +#define MAX77620_REG_ONOFFCNFG1 0x41 +#define MAX77620_REG_ONOFFCNFG2 0x42 + +/* FPS Registers */ +#define MAX77620_REG_FPS_CFG0 0x43 +#define MAX77620_REG_FPS_CFG1 0x44 +#define MAX77620_REG_FPS_CFG2 0x45 +#define MAX77620_REG_FPS_LDO0 0x46 +#define MAX77620_REG_FPS_LDO1 0x47 +#define MAX77620_REG_FPS_LDO2 0x48 +#define MAX77620_REG_FPS_LDO3 0x49 +#define MAX77620_REG_FPS_LDO4 0x4A +#define MAX77620_REG_FPS_LDO5 0x4B +#define MAX77620_REG_FPS_LDO6 0x4C +#define MAX77620_REG_FPS_LDO7 0x4D +#define MAX77620_REG_FPS_LDO8 0x4E +#define MAX77620_REG_FPS_SD0 0x4F +#define MAX77620_REG_FPS_SD1 0x50 +#define MAX77620_REG_FPS_SD2 0x51 +#define MAX77620_REG_FPS_SD3 0x52 +#define MAX77620_REG_FPS_SD4 0x53 +#define MAX77620_REG_FPS_NONE 0 + +#define MAX77620_FPS_SRC_MASK 0xC0 +#define MAX77620_FPS_SRC_SHIFT 6 +#define MAX77620_FPS_PU_PERIOD_MASK 0x38 +#define MAX77620_FPS_PU_PERIOD_SHIFT 3 +#define MAX77620_FPS_PD_PERIOD_MASK 0x07 +#define MAX77620_FPS_PD_PERIOD_SHIFT 0 +#define MAX77620_FPS_TIME_PERIOD_MASK 0x38 +#define MAX77620_FPS_TIME_PERIOD_SHIFT 3 +#define MAX77620_FPS_EN_SRC_MASK 0x06 +#define MAX77620_FPS_EN_SRC_SHIFT 1 +#define MAX77620_FPS_ENFPS_SW_MASK 0x01 +#define MAX77620_FPS_ENFPS_SW 0x01 + +/* Minimum and maximum FPS period time (in microseconds) are + * different for MAX77620 and Max20024. + */ +#define MAX77620_FPS_PERIOD_MIN_US 40 +#define MAX20024_FPS_PERIOD_MIN_US 20 + +#define MAX77620_FPS_PERIOD_MAX_US 2560 +#define MAX20024_FPS_PERIOD_MAX_US 5120 + +#define MAX77620_REG_FPS_GPIO1 0x54 +#define MAX77620_REG_FPS_GPIO2 0x55 +#define MAX77620_REG_FPS_GPIO3 0x56 +#define MAX77620_REG_FPS_RSO 0x57 +#define MAX77620_REG_CID0 0x58 +#define MAX77620_REG_CID1 0x59 +#define MAX77620_REG_CID2 0x5A +#define MAX77620_REG_CID3 0x5B +#define MAX77620_REG_CID4 0x5C +#define MAX77620_REG_CID5 0x5D + +#define MAX77620_REG_DVSSD4 0x5E +#define MAX20024_REG_MAX_ADD 0x70 + +#define MAX77620_CID_DIDM_MASK 0xF0 +#define MAX77620_CID_DIDM_SHIFT 4 + +/* CNCG2SD */ +#define MAX77620_SD_CNF2_ROVS_EN_SD1 BIT(1) +#define MAX77620_SD_CNF2_ROVS_EN_SD0 BIT(2) + +/* Device Identification Metal */ +#define MAX77620_CID5_DIDM(n) (((n) >> 4) & 0xF) +/* Device Indentification OTP */ +#define MAX77620_CID5_DIDO(n) ((n) & 0xF) + +/* SD CNFG1 */ +#define MAX77620_SD_SR_MASK 0xC0 +#define MAX77620_SD_SR_SHIFT 6 +#define MAX77620_SD_POWER_MODE_MASK 0x30 +#define MAX77620_SD_POWER_MODE_SHIFT 4 +#define MAX77620_SD_CFG1_ADE_MASK BIT(3) +#define MAX77620_SD_CFG1_ADE_DISABLE 0 +#define MAX77620_SD_CFG1_ADE_ENABLE BIT(3) +#define MAX77620_SD_FPWM_MASK 0x04 +#define MAX77620_SD_FPWM_SHIFT 2 +#define MAX77620_SD_FSRADE_MASK 0x01 +#define MAX77620_SD_FSRADE_SHIFT 0 +#define MAX77620_SD_CFG1_FPWM_SD_MASK BIT(2) +#define MAX77620_SD_CFG1_FPWM_SD_SKIP 0 +#define MAX77620_SD_CFG1_FPWM_SD_FPWM BIT(2) +#define MAX77620_SD_CFG1_FSRADE_SD_MASK BIT(0) +#define MAX77620_SD_CFG1_FSRADE_SD_DISABLE 0 +#define MAX77620_SD_CFG1_FSRADE_SD_ENABLE BIT(0) + +/* LDO_CNFG2 */ +#define MAX77620_LDO_POWER_MODE_MASK 0xC0 +#define MAX77620_LDO_POWER_MODE_SHIFT 6 +#define MAX77620_LDO_CFG2_ADE_MASK BIT(1) +#define MAX77620_LDO_CFG2_ADE_DISABLE 0 +#define MAX77620_LDO_CFG2_ADE_ENABLE BIT(1) +#define MAX77620_LDO_CFG2_SS_MASK BIT(0) +#define MAX77620_LDO_CFG2_SS_FAST BIT(0) +#define MAX77620_LDO_CFG2_SS_SLOW 0 + +#define MAX77620_IRQ_TOP_GLBL_MASK BIT(7) +#define MAX77620_IRQ_TOP_SD_MASK BIT(6) +#define MAX77620_IRQ_TOP_LDO_MASK BIT(5) +#define MAX77620_IRQ_TOP_GPIO_MASK BIT(4) +#define MAX77620_IRQ_TOP_RTC_MASK BIT(3) +#define MAX77620_IRQ_TOP_32K_MASK BIT(2) +#define MAX77620_IRQ_TOP_ONOFF_MASK BIT(1) + +#define MAX77620_IRQ_LBM_MASK BIT(3) +#define MAX77620_IRQ_TJALRM1_MASK BIT(2) +#define MAX77620_IRQ_TJALRM2_MASK BIT(1) + +#define MAX77620_PWR_I2C_ADDR 0x3c +#define MAX77620_RTC_I2C_ADDR 0x68 + +#define MAX77620_CNFG_GPIO_DRV_MASK BIT(0) +#define MAX77620_CNFG_GPIO_DRV_PUSHPULL BIT(0) +#define MAX77620_CNFG_GPIO_DRV_OPENDRAIN 0 +#define MAX77620_CNFG_GPIO_DIR_MASK BIT(1) +#define MAX77620_CNFG_GPIO_DIR_INPUT BIT(1) +#define MAX77620_CNFG_GPIO_DIR_OUTPUT 0 +#define MAX77620_CNFG_GPIO_INPUT_VAL_MASK BIT(2) +#define MAX77620_CNFG_GPIO_OUTPUT_VAL_MASK BIT(3) +#define MAX77620_CNFG_GPIO_OUTPUT_VAL_HIGH BIT(3) +#define MAX77620_CNFG_GPIO_OUTPUT_VAL_LOW 0 +#define MAX77620_CNFG_GPIO_INT_MASK (0x3 << 4) +#define MAX77620_CNFG_GPIO_INT_FALLING BIT(4) +#define MAX77620_CNFG_GPIO_INT_RISING BIT(5) +#define MAX77620_CNFG_GPIO_DBNC_MASK (0x3 << 6) +#define MAX77620_CNFG_GPIO_DBNC_None (0x0 << 6) +#define MAX77620_CNFG_GPIO_DBNC_8ms (0x1 << 6) +#define MAX77620_CNFG_GPIO_DBNC_16ms (0x2 << 6) +#define MAX77620_CNFG_GPIO_DBNC_32ms (0x3 << 6) + +#define MAX77620_IRQ_LVL2_GPIO_EDGE0 BIT(0) +#define MAX77620_IRQ_LVL2_GPIO_EDGE1 BIT(1) +#define MAX77620_IRQ_LVL2_GPIO_EDGE2 BIT(2) +#define MAX77620_IRQ_LVL2_GPIO_EDGE3 BIT(3) +#define MAX77620_IRQ_LVL2_GPIO_EDGE4 BIT(4) +#define MAX77620_IRQ_LVL2_GPIO_EDGE5 BIT(5) +#define MAX77620_IRQ_LVL2_GPIO_EDGE6 BIT(6) +#define MAX77620_IRQ_LVL2_GPIO_EDGE7 BIT(7) + +#define MAX77620_CNFG1_32K_OUT0_EN BIT(2) + +#define MAX77620_ONOFFCNFG1_SFT_RST BIT(7) +#define MAX77620_ONOFFCNFG1_MRT_MASK 0x38 +#define MAX77620_ONOFFCNFG1_MRT_SHIFT 0x3 +#define MAX77620_ONOFFCNFG1_SLPEN BIT(2) +#define MAX77620_ONOFFCNFG1_PWR_OFF BIT(1) +#define MAX20024_ONOFFCNFG1_CLRSE 0x18 + +#define MAX77620_ONOFFCNFG2_SFT_RST_WK BIT(7) +#define MAX77620_ONOFFCNFG2_WD_RST_WK BIT(6) +#define MAX77620_ONOFFCNFG2_SLP_LPM_MSK BIT(5) +#define MAX77620_ONOFFCNFG2_WK_ALARM1 BIT(2) +#define MAX77620_ONOFFCNFG2_WK_EN0 BIT(0) + +#define MAX77620_GLBLM_MASK BIT(0) + +#define MAX77620_WDTC_MASK 0x3 +#define MAX77620_WDTOFFC BIT(4) +#define MAX77620_WDTSLPC BIT(3) +#define MAX77620_WDTEN BIT(2) + +#define MAX77620_TWD_MASK 0x3 +#define MAX77620_TWD_2s 0x0 +#define MAX77620_TWD_16s 0x1 +#define MAX77620_TWD_64s 0x2 +#define MAX77620_TWD_128s 0x3 + +#define MAX77620_CNFGGLBL1_LBDAC_EN BIT(7) +#define MAX77620_CNFGGLBL1_MPPLD BIT(6) +#define MAX77620_CNFGGLBL1_LBHYST (BIT(5) | BIT(4)) +#define MAX77620_CNFGGLBL1_LBDAC 0x0E +#define MAX77620_CNFGGLBL1_LBRSTEN BIT(0) + +/* CNFG BBC registers */ +#define MAX77620_CNFGBBC_ENABLE BIT(0) +#define MAX77620_CNFGBBC_CURRENT_MASK 0x06 +#define MAX77620_CNFGBBC_CURRENT_SHIFT 1 +#define MAX77620_CNFGBBC_VOLTAGE_MASK 0x18 +#define MAX77620_CNFGBBC_VOLTAGE_SHIFT 3 +#define MAX77620_CNFGBBC_LOW_CURRENT_DISABLE BIT(5) +#define MAX77620_CNFGBBC_RESISTOR_MASK 0xC0 +#define MAX77620_CNFGBBC_RESISTOR_SHIFT 6 + +#define MAX77620_FPS_COUNT 3 + +/* Interrupts */ +enum { + MAX77620_IRQ_TOP_GLBL, /* Low-Battery */ + MAX77620_IRQ_TOP_SD, /* SD power fail */ + MAX77620_IRQ_TOP_LDO, /* LDO power fail */ + MAX77620_IRQ_TOP_GPIO, /* TOP GPIO internal int to MAX77620 */ + MAX77620_IRQ_TOP_RTC, /* RTC */ + MAX77620_IRQ_TOP_32K, /* 32kHz oscillator */ + MAX77620_IRQ_TOP_ONOFF, /* ON/OFF oscillator */ + MAX77620_IRQ_LBT_MBATLOW, /* Thermal alarm status, > 120C */ + MAX77620_IRQ_LBT_TJALRM1, /* Thermal alarm status, > 120C */ + MAX77620_IRQ_LBT_TJALRM2, /* Thermal alarm status, > 140C */ +}; + +/* GPIOs */ +enum { + MAX77620_GPIO0, + MAX77620_GPIO1, + MAX77620_GPIO2, + MAX77620_GPIO3, + MAX77620_GPIO4, + MAX77620_GPIO5, + MAX77620_GPIO6, + MAX77620_GPIO7, + MAX77620_GPIO_NR, +}; + +/* FPS Source */ +enum max77620_fps_src { + MAX77620_FPS_SRC_0, + MAX77620_FPS_SRC_1, + MAX77620_FPS_SRC_2, + MAX77620_FPS_SRC_NONE, + MAX77620_FPS_SRC_DEF, +}; + +enum max77620_chip_id { + MAX77620, + MAX20024, +}; + +struct max77620_chip { + struct device *dev; + struct regmap *rmap; + + int chip_irq; + int irq_base; + + /* chip id */ + enum max77620_chip_id chip_id; + + bool sleep_enable; + bool enable_global_lpm; + int shutdown_fps_period[MAX77620_FPS_COUNT]; + int suspend_fps_period[MAX77620_FPS_COUNT]; + + struct regmap_irq_chip_data *top_irq_data; + struct regmap_irq_chip_data *gpio_irq_data; +}; + +#endif /* _MFD_MAX77620_H_ */ -- cgit v1.2.3 From 884be175351e73c515303118150f195dd611787c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Apr 2016 23:56:31 -0400 Subject: nfs: per-name sillyunlink exclusion use d_alloc_parallel() for sillyunlink/lookup exclusion and explicit rwsem (nfs_rmdir() being a writer and nfs_call_unlink() - a reader) for rmdir/sillyunlink one. That ought to make lookup/readdir/!O_CREAT atomic_open really parallel on NFS. Signed-off-by: Al Viro --- include/linux/nfs_fs.h | 11 +++-------- include/linux/nfs_xdr.h | 4 ++-- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67300f8e5f2f..fa167f25465d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -163,11 +163,9 @@ struct nfs_inode { /* Open contexts for shared mmap writes */ struct list_head open_files; - /* Number of in-flight sillydelete RPC calls */ - atomic_t silly_count; - /* List of deferred sillydelete requests */ - struct hlist_head silly_list; - wait_queue_head_t waitqueue; + /* Readers: in-flight sillydelete RPC calls */ + /* Writers: rmdir */ + struct rw_semaphore rmdir_sem; #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; @@ -492,9 +490,6 @@ extern void nfs_release_automount_timer(void); * linux/fs/nfs/unlink.c */ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); -extern void nfs_wait_on_sillyrename(struct dentry *dentry); -extern void nfs_block_sillyrename(struct dentry *dentry); -extern void nfs_unblock_sillyrename(struct dentry *dentry); /* * linux/fs/nfs/write.c diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d320906cf13e..ee8491dadbf3 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1468,10 +1468,10 @@ struct nfs_pgio_completion_ops { }; struct nfs_unlinkdata { - struct hlist_node list; struct nfs_removeargs args; struct nfs_removeres res; - struct inode *dir; + struct dentry *dentry; + wait_queue_head_t wq; struct rpc_cred *cred; struct nfs_fattr dir_attr; long timeout; -- cgit v1.2.3 From 4f41fc59620fcedaa97cbdf3d7d2956d80fcd922 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 9 May 2016 09:59:55 -0500 Subject: cgroup, kernfs: make mountinfo show properly scoped path for cgroup namespaces Patch summary: When showing a cgroupfs entry in mountinfo, show the path of the mount root dentry relative to the reader's cgroup namespace root. Short explanation (courtesy of mkerrisk): If we create a new cgroup namespace, then we want both /proc/self/cgroup and /proc/self/mountinfo to show cgroup paths that are correctly virtualized with respect to the cgroup mount point. Previous to this patch, /proc/self/cgroup shows the right info, but /proc/self/mountinfo does not. Long version: When a uid 0 task which is in freezer cgroup /a/b, unshares a new cgroup namespace, and then mounts a new instance of the freezer cgroup, the new mount will be rooted at /a/b. The root dentry field of the mountinfo entry will show '/a/b'. cat > /tmp/do1 << EOF mount -t cgroup -o freezer freezer /mnt grep freezer /proc/self/mountinfo EOF unshare -Gm bash /tmp/do1 > 330 160 0:34 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer > 355 133 0:34 /a/b /mnt rw,relatime - cgroup freezer rw,freezer The task's freezer cgroup entry in /proc/self/cgroup will simply show '/': grep freezer /proc/self/cgroup 9:freezer:/ If instead the same task simply bind mounts the /a/b cgroup directory, the resulting mountinfo entry will again show /a/b for the dentry root. However in this case the task will find its own cgroup at /mnt/a/b, not at /mnt: mount --bind /sys/fs/cgroup/freezer/a/b /mnt 130 25 0:34 /a/b /mnt rw,nosuid,nodev,noexec,relatime shared:21 - cgroup cgroup rw,freezer In other words, there is no way for the task to know, based on what is in mountinfo, which cgroup directory is its own. Example (by mkerrisk): First, a little script to save some typing and verbiage: echo -e "\t/proc/self/cgroup:\t$(cat /proc/self/cgroup | grep freezer)" cat /proc/self/mountinfo | grep freezer | awk '{print "\tmountinfo:\t\t" $4 "\t" $5}' Create cgroup, place this shell into the cgroup, and look at the state of the /proc files: 2653 2653 # Our shell 14254 # cat(1) /proc/self/cgroup: 10:freezer:/a/b mountinfo: / /sys/fs/cgroup/freezer Create a shell in new cgroup and mount namespaces. The act of creating a new cgroup namespace causes the process's current cgroups directories to become its cgroup root directories. (Here, I'm using my own version of the "unshare" utility, which takes the same options as the util-linux version): Look at the state of the /proc files: /proc/self/cgroup: 10:freezer:/ mountinfo: / /sys/fs/cgroup/freezer The third entry in /proc/self/cgroup (the pathname of the cgroup inside the hierarchy) is correctly virtualized w.r.t. the cgroup namespace, which is rooted at /a/b in the outer namespace. However, the info in /proc/self/mountinfo is not for this cgroup namespace, since we are seeing a duplicate of the mount from the old mount namespace, and the info there does not correspond to the new cgroup namespace. However, trying to create a new mount still doesn't show us the right information in mountinfo: # propagating to other mountns /proc/self/cgroup: 7:freezer:/ mountinfo: /a/b /mnt/freezer The act of creating a new cgroup namespace caused the process's current freezer directory, "/a/b", to become its cgroup freezer root directory. In other words, the pathname directory of the directory within the newly mounted cgroup filesystem should be "/", but mountinfo wrongly shows us "/a/b". The consequence of this is that the process in the cgroup namespace cannot correctly construct the pathname of its cgroup root directory from the information in /proc/PID/mountinfo. With this patch, the dentry root field in mountinfo is shown relative to the reader's cgroup namespace. So the same steps as above: /proc/self/cgroup: 10:freezer:/a/b mountinfo: / /sys/fs/cgroup/freezer /proc/self/cgroup: 10:freezer:/ mountinfo: /../.. /sys/fs/cgroup/freezer /proc/self/cgroup: 10:freezer:/ mountinfo: / /mnt/freezer cgroup.clone_children freezer.parent_freezing freezer.state tasks cgroup.procs freezer.self_freezing notify_on_release 3164 2653 # First shell that placed in this cgroup 3164 # Shell started by 'unshare' 14197 # cat(1) Signed-off-by: Serge Hallyn Tested-by: Michael Kerrisk Acked-by: Michael Kerrisk Signed-off-by: Tejun Heo --- include/linux/kernfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index c06c44242f39..30f089ebe0a4 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -152,6 +152,8 @@ struct kernfs_syscall_ops { int (*rmdir)(struct kernfs_node *kn); int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name); + int (*show_path)(struct seq_file *sf, struct kernfs_node *kn, + struct kernfs_root *root); }; struct kernfs_root { -- cgit v1.2.3 From 85ad1d13ee9b3db00615ea24b031c15e5ba14fd1 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Tue, 3 May 2016 22:22:13 -0400 Subject: md: set MD_CHANGE_PENDING in a atomic region Some code waits for a metadata update by: 1. flagging that it is needed (MD_CHANGE_DEVS or MD_CHANGE_CLEAN) 2. setting MD_CHANGE_PENDING and waking the management thread 3. waiting for MD_CHANGE_PENDING to be cleared If the first two are done without locking, the code in md_update_sb() which checks if it needs to repeat might test if an update is needed before step 1, then clear MD_CHANGE_PENDING after step 2, resulting in the wait returning early. So make sure all places that set MD_CHANGE_PENDING are atomicial, and bit_clear_unless (suggested by Neil) is introduced for the purpose. Cc: Martin Kepplinger Cc: Andrew Morton Cc: Denys Vlasenko Cc: Sasha Levin Cc: Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- include/linux/bitops.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index defeaac0745f..299e76b59fe9 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -227,6 +227,22 @@ static inline unsigned long __ffs64(u64 word) }) #endif +#ifndef bit_clear_unless +#define bit_clear_unless(ptr, _clear, _test) \ +({ \ + const typeof(*ptr) clear = (_clear), test = (_test); \ + typeof(*ptr) old, new; \ + \ + do { \ + old = ACCESS_ONCE(*ptr); \ + new = old & ~clear; \ + } while (!(old & test) && \ + cmpxchg(ptr, old, new) != old); \ + \ + !(old & test); \ +}) +#endif + #ifndef find_last_bit /** * find_last_bit - find the last set bit in a memory region -- cgit v1.2.3 From 661ce1f0c4a69f92ad781d8d2c205c90dd9c5833 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:45 +0200 Subject: libata/libsas: Define ATA_CMD_NCQ_NON_DATA Define the NCQ NON DATA command and update libsas to handle it correctly. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 00aebc4c83ad..b84210a28a00 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -243,6 +243,7 @@ enum { ATA_CMD_WRITE_QUEUED_FUA_EXT = 0x3E, ATA_CMD_FPDMA_READ = 0x60, ATA_CMD_FPDMA_WRITE = 0x61, + ATA_CMD_NCQ_NON_DATA = 0x63, ATA_CMD_FPDMA_SEND = 0x64, ATA_CMD_FPDMA_RECV = 0x65, ATA_CMD_PIO_READ = 0x20, -- cgit v1.2.3 From 5c65d8bb3503beb12864895426a69269c19e6e87 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:47 +0200 Subject: libata: Add command definitions for NCQ Encapsulation for READ LOG DMA EXT ACS-4 defines an NCQ encapsulation for READ LOG DMA EXT. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 5 +++++ include/linux/libata.h | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index b84210a28a00..94ccde5ee83c 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -306,6 +306,9 @@ enum { /* marked obsolete in the ATA/ATAPI-7 spec */ ATA_CMD_RESTORE = 0x10, + /* Subcmds for ATA_CMD_FPDMA_RECV */ + ATA_SUBCMD_FPDMA_RECV_RD_LOG_DMA_EXT = 0x01, + /* Subcmds for ATA_CMD_FPDMA_SEND */ ATA_SUBCMD_FPDMA_SEND_DSM = 0x00, ATA_SUBCMD_FPDMA_SEND_WR_LOG_DMA_EXT = 0x02, @@ -329,7 +332,9 @@ enum { ATA_LOG_NCQ_SEND_RECV_DSM_OFFSET = 0x04, ATA_LOG_NCQ_SEND_RECV_DSM_TRIM = (1 << 0), ATA_LOG_NCQ_SEND_RECV_RD_LOG_OFFSET = 0x08, + ATA_LOG_NCQ_SEND_RECV_RD_LOG_SUPPORTED = (1 << 0), ATA_LOG_NCQ_SEND_RECV_WR_LOG_OFFSET = 0x0C, + ATA_LOG_NCQ_SEND_RECV_WR_LOG_SUPPORTED = (1 << 0), ATA_LOG_NCQ_SEND_RECV_SIZE = 0x10, /* READ/WRITE LONG (obsolete) */ diff --git a/include/linux/libata.h b/include/linux/libata.h index a418bca0df0d..09ddb5a6f555 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1642,6 +1642,13 @@ static inline bool ata_fpdma_dsm_supported(struct ata_device *dev) ATA_LOG_NCQ_SEND_RECV_DSM_TRIM); } +static inline bool ata_fpdma_read_log_supported(struct ata_device *dev) +{ + return (dev->flags & ATA_DFLAG_NCQ_SEND_RECV) && + (dev->ncq_send_recv_cmds[ATA_LOG_NCQ_SEND_RECV_RD_LOG_OFFSET] & + ATA_LOG_NCQ_SEND_RECV_RD_LOG_SUPPORTED); +} + static inline void ata_qc_set_polling(struct ata_queued_cmd *qc) { qc->tf.ctl |= ATA_NIEN; -- cgit v1.2.3 From fe5af0cc3029d52e31d282f5d53787d308e9695a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:48 +0200 Subject: libata: Check log page directory before accessing pages When reading the NCQ Send/Recv log it might actually not supported, thereby causing irritating messages 'READ LOG DMA EXT failed'. Instead we should be reading the log directory first to figure out if the log is actually supported before trying to access it. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 94ccde5ee83c..b5be5e85d2d3 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -314,6 +314,7 @@ enum { ATA_SUBCMD_FPDMA_SEND_WR_LOG_DMA_EXT = 0x02, /* READ_LOG_EXT pages */ + ATA_LOG_DIRECTORY = 0x0, ATA_LOG_SATA_NCQ = 0x10, ATA_LOG_NCQ_SEND_RECV = 0x13, ATA_LOG_SATA_ID_DEV_DATA = 0x30, -- cgit v1.2.3 From a57038496422d7d21b7e41ed70d63bf0c6ff6068 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:49 +0200 Subject: libata-trace: decode subcommands Some commands like FPDMA RECEIVE or NCQ NON DATA can encapsulate other commands to NCQ transport. So decode the subcmds, too. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index b5be5e85d2d3..032bb223cd8c 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -313,6 +313,11 @@ enum { ATA_SUBCMD_FPDMA_SEND_DSM = 0x00, ATA_SUBCMD_FPDMA_SEND_WR_LOG_DMA_EXT = 0x02, + /* Subcmds for ATA_CMD_NCQ_NON_DATA */ + ATA_SUBCMD_NCQ_NON_DATA_ABORT_QUEUE = 0x00, + ATA_SUBCMD_NCQ_NON_DATA_SET_FEATURES = 0x05, + ATA_SUBCMD_NCQ_NON_DATA_ZERO_EXT = 0x06, + /* READ_LOG_EXT pages */ ATA_LOG_DIRECTORY = 0x0, ATA_LOG_SATA_NCQ = 0x10, @@ -338,6 +343,18 @@ enum { ATA_LOG_NCQ_SEND_RECV_WR_LOG_SUPPORTED = (1 << 0), ATA_LOG_NCQ_SEND_RECV_SIZE = 0x10, + /* NCQ Non-Data log */ + ATA_LOG_NCQ_NON_DATA_SUBCMDS_OFFSET = 0x00, + ATA_LOG_NCQ_NON_DATA_ABORT_OFFSET = 0x00, + ATA_LOG_NCQ_NON_DATA_ABORT_NCQ = (1 << 0), + ATA_LOG_NCQ_NON_DATA_ABORT_ALL = (1 << 1), + ATA_LOG_NCQ_NON_DATA_ABORT_STREAMING = (1 << 2), + ATA_LOG_NCQ_NON_DATA_ABORT_NON_STREAMING = (1 << 3), + ATA_LOG_NCQ_NON_DATA_ABORT_SELECTED = (1 << 4), + ATA_LOG_NCQ_NON_DATA_ZAC_MGMT_OFFSET = 0x1C, + ATA_LOG_NCQ_NON_DATA_ZAC_MGMT_OUT = (1 << 0), + ATA_LOG_NCQ_NON_DATA_SIZE = 0x40, + /* READ/WRITE LONG (obsolete) */ ATA_CMD_READ_LONG = 0x22, ATA_CMD_READ_LONG_ONCE = 0x23, -- cgit v1.2.3 From 3a92945b24c7ff46757a3d5d5112bfc62d2e45b2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:51 +0200 Subject: libata: fixup ZAC device disabling libata device disabling is ... curious. So add the correct definitions that we can disable ZAC devices properly. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/libata.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 09ddb5a6f555..92297cd111f6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -192,7 +192,8 @@ enum { ATA_DEV_SEMB = 7, /* SEMB */ ATA_DEV_SEMB_UNSUP = 8, /* SEMB (unsupported) */ ATA_DEV_ZAC = 9, /* ZAC device */ - ATA_DEV_NONE = 10, /* no device */ + ATA_DEV_ZAC_UNSUP = 10, /* ZAC device (unsupported) */ + ATA_DEV_NONE = 11, /* no device */ /* struct ata_link flags */ ATA_LFLAG_NO_HRST = (1 << 1), /* avoid hardreset */ @@ -1524,7 +1525,8 @@ static inline unsigned int ata_class_enabled(unsigned int class) static inline unsigned int ata_class_disabled(unsigned int class) { return class == ATA_DEV_ATA_UNSUP || class == ATA_DEV_ATAPI_UNSUP || - class == ATA_DEV_PMP_UNSUP || class == ATA_DEV_SEMB_UNSUP; + class == ATA_DEV_PMP_UNSUP || class == ATA_DEV_SEMB_UNSUP || + class == ATA_DEV_ZAC_UNSUP; } static inline unsigned int ata_class_absent(unsigned int class) -- cgit v1.2.3 From 28a3fc2295a744a0d2ddf86b2ccdf03fbab123f9 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:52 +0200 Subject: libata: implement ZBC IN translation ZAC drives implement a 'ZAC Management In' command template, which maps onto the ZBC IN command. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 10 +++++++++- include/linux/libata.h | 7 +++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 032bb223cd8c..255aa0f1c9bc 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -302,12 +302,14 @@ enum { ATA_CMD_CFA_WRITE_MULT_NE = 0xCD, ATA_CMD_REQ_SENSE_DATA = 0x0B, ATA_CMD_SANITIZE_DEVICE = 0xB4, + ATA_CMD_ZAC_MGMT_IN = 0x4A, /* marked obsolete in the ATA/ATAPI-7 spec */ ATA_CMD_RESTORE = 0x10, /* Subcmds for ATA_CMD_FPDMA_RECV */ ATA_SUBCMD_FPDMA_RECV_RD_LOG_DMA_EXT = 0x01, + ATA_SUBCMD_FPDMA_RECV_ZAC_MGMT_IN = 0x02, /* Subcmds for ATA_CMD_FPDMA_SEND */ ATA_SUBCMD_FPDMA_SEND_DSM = 0x00, @@ -318,6 +320,9 @@ enum { ATA_SUBCMD_NCQ_NON_DATA_SET_FEATURES = 0x05, ATA_SUBCMD_NCQ_NON_DATA_ZERO_EXT = 0x06, + /* Subcmds for ATA_CMD_ZAC_MGMT_IN */ + ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES = 0x00, + /* READ_LOG_EXT pages */ ATA_LOG_DIRECTORY = 0x0, ATA_LOG_SATA_NCQ = 0x10, @@ -341,7 +346,10 @@ enum { ATA_LOG_NCQ_SEND_RECV_RD_LOG_SUPPORTED = (1 << 0), ATA_LOG_NCQ_SEND_RECV_WR_LOG_OFFSET = 0x0C, ATA_LOG_NCQ_SEND_RECV_WR_LOG_SUPPORTED = (1 << 0), - ATA_LOG_NCQ_SEND_RECV_SIZE = 0x10, + ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_OFFSET = 0x10, + ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_OUT_SUPPORTED = (1 << 0), + ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_IN_SUPPORTED = (1 << 1), + ATA_LOG_NCQ_SEND_RECV_SIZE = 0x14, /* NCQ Non-Data log */ ATA_LOG_NCQ_NON_DATA_SUBCMDS_OFFSET = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index 92297cd111f6..c0806b60c4fa 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1651,6 +1651,13 @@ static inline bool ata_fpdma_read_log_supported(struct ata_device *dev) ATA_LOG_NCQ_SEND_RECV_RD_LOG_SUPPORTED); } +static inline bool ata_fpdma_zac_mgmt_in_supported(struct ata_device *dev) +{ + return (dev->flags & ATA_DFLAG_NCQ_SEND_RECV) && + (dev->ncq_send_recv_cmds[ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_OFFSET] & + ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_IN_SUPPORTED); +} + static inline void ata_qc_set_polling(struct ata_queued_cmd *qc) { qc->tf.ctl |= ATA_NIEN; -- cgit v1.2.3 From 27708a9579ee069c6e0ebb6e61ac1114ed1d546c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:53 +0200 Subject: libata: Implement ZBC OUT translation ZAC drives implement a 'ZAC Management Out' command template, which maps onto the ZBC OUT command. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 255aa0f1c9bc..9d7c47075ebc 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -303,6 +303,7 @@ enum { ATA_CMD_REQ_SENSE_DATA = 0x0B, ATA_CMD_SANITIZE_DEVICE = 0xB4, ATA_CMD_ZAC_MGMT_IN = 0x4A, + ATA_CMD_ZAC_MGMT_OUT = 0x9F, /* marked obsolete in the ATA/ATAPI-7 spec */ ATA_CMD_RESTORE = 0x10, @@ -323,6 +324,12 @@ enum { /* Subcmds for ATA_CMD_ZAC_MGMT_IN */ ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES = 0x00, + /* Subcmds for ATA_CMD_ZAC_MGMT_OUT */ + ATA_SUBCMD_ZAC_MGMT_OUT_CLOSE_ZONE = 0x01, + ATA_SUBCMD_ZAC_MGMT_OUT_FINISH_ZONE = 0x02, + ATA_SUBCMD_ZAC_MGMT_OUT_OPEN_ZONE = 0x03, + ATA_SUBCMD_ZAC_MGMT_OUT_RESET_WRITE_POINTER = 0x04, + /* READ_LOG_EXT pages */ ATA_LOG_DIRECTORY = 0x0, ATA_LOG_SATA_NCQ = 0x10, -- cgit v1.2.3 From 284b3b77ea883234dadb2cbf97b145c3c30fe4bd Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:54 +0200 Subject: libata: NCQ encapsulation for ZAC MANAGEMENT OUT Add NCQ encapsulation for ZAC MANAGEMENT OUT and evaluate NCQ Non-Data log pages to figure out if NCQ encapsulation is supported. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 7 +++++++ include/linux/libata.h | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 9d7c47075ebc..e62703201e84 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -320,6 +320,7 @@ enum { ATA_SUBCMD_NCQ_NON_DATA_ABORT_QUEUE = 0x00, ATA_SUBCMD_NCQ_NON_DATA_SET_FEATURES = 0x05, ATA_SUBCMD_NCQ_NON_DATA_ZERO_EXT = 0x06, + ATA_SUBCMD_NCQ_NON_DATA_ZAC_MGMT_OUT = 0x07, /* Subcmds for ATA_CMD_ZAC_MGMT_IN */ ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES = 0x00, @@ -333,6 +334,7 @@ enum { /* READ_LOG_EXT pages */ ATA_LOG_DIRECTORY = 0x0, ATA_LOG_SATA_NCQ = 0x10, + ATA_LOG_NCQ_NON_DATA = 0x12, ATA_LOG_NCQ_SEND_RECV = 0x13, ATA_LOG_SATA_ID_DEV_DATA = 0x30, ATA_LOG_SATA_SETTINGS = 0x08, @@ -877,6 +879,11 @@ static inline bool ata_id_has_ncq_send_and_recv(const u16 *id) return id[ATA_ID_SATA_CAPABILITY_2] & BIT(6); } +static inline bool ata_id_has_ncq_non_data(const u16 *id) +{ + return id[ATA_ID_SATA_CAPABILITY_2] & BIT(5); +} + static inline bool ata_id_has_trim(const u16 *id) { if (ata_id_major_version(id) >= 7 && diff --git a/include/linux/libata.h b/include/linux/libata.h index c0806b60c4fa..0019d4b51b11 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -729,6 +729,7 @@ struct ata_device { /* NCQ send and receive log subcommand support */ u8 ncq_send_recv_cmds[ATA_LOG_NCQ_SEND_RECV_SIZE]; + u8 ncq_non_data_cmds[ATA_LOG_NCQ_NON_DATA_SIZE]; /* error history */ int spdn_cnt; @@ -1658,6 +1659,12 @@ static inline bool ata_fpdma_zac_mgmt_in_supported(struct ata_device *dev) ATA_LOG_NCQ_SEND_RECV_ZAC_MGMT_IN_SUPPORTED); } +static inline bool ata_fpdma_zac_mgmt_out_supported(struct ata_device *dev) +{ + return (dev->ncq_non_data_cmds[ATA_LOG_NCQ_NON_DATA_ZAC_MGMT_OFFSET] & + ATA_LOG_NCQ_NON_DATA_ZAC_MGMT_OUT); +} + static inline void ata_qc_set_polling(struct ata_queued_cmd *qc) { qc->tf.ctl |= ATA_NIEN; -- cgit v1.2.3 From 856c4663930988118d9f355aad66811dd6df06de Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:55 +0200 Subject: libata: support device-managed ZAC devices Device-managed ZAC devices just set the zoned capabilities field in INQUIRY byte 69 (cf ACS-4). This corresponds to the 'zoned' field in the block device characteristics VPD page. As this is only defined in SPC-5/SBC-4 we also need to update the supported SCSI version descriptor. Reviewed-by: Shaun Tancheff Tested-by: Shaun Tancheff Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index e62703201e84..ac1cb9310dea 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -935,6 +935,11 @@ static inline bool ata_id_is_ssd(const u16 *id) return id[ATA_ID_ROT_SPEED] == 0x01; } +static inline u8 ata_id_zoned_cap(const u16 *id) +{ + return (id[ATA_ID_ADDITIONAL_SUPP] & 0x3); +} + static inline bool ata_id_pio_need_iordy(const u16 *id, const u8 pio) { /* CF spec. r4.1 Table 22 says no IORDY on PIO5 and PIO6. */ -- cgit v1.2.3 From 6d1003ae8db228b74ef61536364cd2a1bd973dd8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 25 Apr 2016 12:45:56 +0200 Subject: libata: support host-aware and host-managed ZAC devices Byte 69 bits 0:1 in the IDENTIFY DEVICE data indicate a host-aware ZAC device. Host-managed ZAC devices have their own individual signature, and to not set the bits in the IDENTIFY DEVICE data. And whenever we detect a ZAC-compatible device we should be displaying the zoned block characteristics VPD page. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 1 + include/linux/libata.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index ac1cb9310dea..83e2a99866c2 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -338,6 +338,7 @@ enum { ATA_LOG_NCQ_SEND_RECV = 0x13, ATA_LOG_SATA_ID_DEV_DATA = 0x30, ATA_LOG_SATA_SETTINGS = 0x08, + ATA_LOG_ZONED_INFORMATION = 0x09, ATA_LOG_DEVSLP_OFFSET = 0x30, ATA_LOG_DEVSLP_SIZE = 0x08, ATA_LOG_DEVSLP_MDAT = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index 0019d4b51b11..d15c19e331d1 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -181,6 +181,7 @@ enum { ATA_DFLAG_DEVSLP = (1 << 27), /* device supports Device Sleep */ ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */ ATA_DFLAG_D_SENSE = (1 << 29), /* Descriptor sense requested */ + ATA_DFLAG_ZAC = (1 << 30), /* ZAC device */ ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ @@ -731,6 +732,12 @@ struct ata_device { u8 ncq_send_recv_cmds[ATA_LOG_NCQ_SEND_RECV_SIZE]; u8 ncq_non_data_cmds[ATA_LOG_NCQ_NON_DATA_SIZE]; + /* ZAC zone configuration */ + u32 zac_zoned_cap; + u32 zac_zones_optimal_open; + u32 zac_zones_optimal_nonseq; + u32 zac_zones_max_open; + /* error history */ int spdn_cnt; /* ering is CLEAR_END, read comment above CLEAR_END */ -- cgit v1.2.3 From 8634de6d254462e6953b7dac772a1df4f44c8030 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 6 May 2016 09:22:25 -0500 Subject: compiler-gcc: require gcc 4.8 for powerpc __builtin_bswap16() gcc support for __builtin_bswap16() was supposedly added for powerpc in gcc 4.6, and was then later added for other architectures in gcc 4.8. However, Stephen Rothwell reported that attempting to use it on powerpc in gcc 4.6 fails with: lib/vsprintf.c:160:2: error: initializer element is not constant lib/vsprintf.c:160:2: error: (near initialization for 'decpair[0]') lib/vsprintf.c:160:2: error: initializer element is not constant lib/vsprintf.c:160:2: error: (near initialization for 'decpair[1]') ... I'm not entirely sure what those errors mean, but I don't see them on gcc 4.8. So let's consider gcc 4.8 to be the official starting point for __builtin_bswap16(). Arnd Bergmann adds: "I found the commit in gcc-4.8 that replaced the powerpc-specific implementation of __builtin_bswap16 with an architecture-independent one. Apparently the powerpc version (gcc-4.6 and 4.7) just mapped to the lhbrx/sthbrx instructions, so it ended up not being a constant, though the intent of the patch was mainly to add support for the builtin to x86: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52624 has the patch that went into gcc-4.8 and more information." Fixes: 7322dd755e7d ("byteswap: try to avoid __builtin_constant_p gcc bug") Reported-by: Stephen Rothwell Tested-by: Stephen Rothwell Acked-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index eeae401a2412..3d5202eda22f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -246,7 +246,7 @@ #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ #endif -#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600) +#if GCC_VERSION >= 40800 #define __HAVE_BUILTIN_BSWAP16__ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ -- cgit v1.2.3 From 59fa0224cfea31dde596e29555de94c961b139f9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 9 May 2016 17:22:15 -0700 Subject: blk-throttle: don't parse cgroup path if trace isn't enabled if trace isn't enabled, parsing cgroup path just wastes cpu Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index afc1343df3c7..0f3172b8b225 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -57,6 +57,14 @@ void __trace_note_message(struct blk_trace *, const char *fmt, ...); } while (0) #define BLK_TN_MAX_MSG 128 +static inline bool blk_trace_note_message_enabled(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + if (likely(!bt)) + return false; + return bt->act_mask & BLK_TC_NOTIFY; +} + extern void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, @@ -79,6 +87,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) # define blk_trace_remove_sysfs(dev) do { } while (0) +# define blk_trace_note_message_enabled(q) (false) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; -- cgit v1.2.3 From 9d9a77cee1ab53dc6419b1ab9da88c4e9342d26a Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 10 May 2016 00:19:41 +0200 Subject: net: phy: add phy_ethtool_{get|set}_link_ksettings Ethtool callbacks {get|set}_link_ksettings are often the same, so we add two generics functions phy_ethtool_{get|set}_link_ksettings to avoid writing severals times the same function. Signed-off-by: Philippe Reynes Acked-By: David Decotigny Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index be3f83bbdc0b..2d24b283aa2d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -829,6 +829,10 @@ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); +int phy_ethtool_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd); +int phy_ethtool_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd); int __init mdio_bus_init(void); void mdio_bus_exit(void); -- cgit v1.2.3 From 1dee3f59a8d5e711797dc82628aaf94a64e99922 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 9 May 2016 11:40:20 +0200 Subject: block/drbd: align properly u64 in nl messages The attribute 0 is never used in drbd, so let's use it as pad attribute in netlink messages. This minimizes the patch. Note that this patch is only compile-tested. Signed-off-by: Nicolas Dichtel Signed-off-by: Lars Ellenberg Signed-off-by: David S. Miller --- include/linux/genl_magic_struct.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index eecd19b37001..6270a56e5edc 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -62,6 +62,11 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); /* MAGIC helpers {{{2 */ +static inline int nla_put_u64_0pad(struct sk_buff *skb, int attrtype, u64 value) +{ + return nla_put_64bit(skb, attrtype, sizeof(u64), &value, 0); +} + /* possible field types */ #define __flg_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U8, char, \ @@ -80,7 +85,7 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); nla_get_u32, nla_put_u32, true) #define __u64_field(attr_nr, attr_flag, name) \ __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ - nla_get_u64, nla_put_u64, false) + nla_get_u64, nla_put_u64_0pad, false) #define __str_field(attr_nr, attr_flag, name, maxlen) \ __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ nla_strlcpy, nla_put, false) -- cgit v1.2.3 From e5366a266a8cd4cd6b0fe66876462cca2e1c6a89 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 6 May 2016 08:37:41 -0700 Subject: mtd: spi-nor: support GigaDevice gd25lq64c Also note the GigaDevice JEDEC ID. No write-protect support yet, since this flash uses a different status register layout. Cc: Ezequiel Garcia Signed-off-by: Brian Norris Acked-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 3c36113a88e1..7f041bd88b82 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -21,6 +21,7 @@ * Sometimes these are the same as CFI IDs, but sometimes they aren't. */ #define SNOR_MFR_ATMEL CFI_MFR_ATMEL +#define SNOR_MFR_GIGADEVICE 0xc8 #define SNOR_MFR_INTEL CFI_MFR_INTEL #define SNOR_MFR_MICRON CFI_MFR_ST /* ST Micro <--> Micron */ #define SNOR_MFR_MACRONIX CFI_MFR_MACRONIX -- cgit v1.2.3 From 54d5ca871e72f2bb172ec9323497f01cd5091ec7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 May 2016 01:16:37 +0200 Subject: vfs: add vfs_select_inode() helper Signed-off-by: Miklos Szeredi Cc: # v4.2+ --- include/linux/dcache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4bb4de8d95ea..7e9422cb5989 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -565,4 +565,16 @@ static inline struct dentry *d_real(struct dentry *dentry) return dentry; } +static inline struct inode *vfs_select_inode(struct dentry *dentry, + unsigned open_flags) +{ + struct inode *inode = d_inode(dentry); + + if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE)) + inode = dentry->d_op->d_select_inode(dentry, open_flags); + + return inode; +} + + #endif /* __LINUX_DCACHE_H */ -- cgit v1.2.3 From 3c9fe8cdff1b889a059a30d22f130372f2b3885f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 May 2016 01:16:37 +0200 Subject: vfs: add lookup_hash() helper Overlayfs needs lookup without inode_permission() and already has the name hash (in form of dentry->d_name on overlayfs dentry). It also doesn't support filesystems with d_op->d_hash() so basically it only needs the actual hashed lookup from lookup_one_len_unlocked() So add a new helper that does unlocked lookup of a hashed name. Signed-off-by: Miklos Szeredi --- include/linux/namei.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 77d01700daf7..ec5ec2818a28 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -79,6 +79,8 @@ extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); +struct qstr; +extern struct dentry *lookup_hash(const struct qstr *, struct dentry *); extern int follow_down_one(struct path *); extern int follow_down(struct path *); -- cgit v1.2.3 From 074f23b675f9f2a9c51c16ecefd06b4658d5e629 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 May 2016 10:22:27 +0100 Subject: irqchip/gic-v3: Remove inexistant register definition The GICv3 include file defines GICR_ISACTIVER and GICR_ICACTIVER in the RD_base page. News flash, they do not exist (probably a copy/paste brain fart). Just drop them. Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index d5d798b35c1f..9e6fdd33bdb2 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -102,8 +102,6 @@ #define GICR_SYNCR 0x00C0 #define GICR_MOVLPIR 0x0100 #define GICR_MOVALLR 0x0110 -#define GICR_ISACTIVER GICD_ISACTIVER -#define GICR_ICACTIVER GICD_ICACTIVER #define GICR_IDREGS GICD_IDREGS #define GICR_PIDR2 GICD_PIDR2 -- cgit v1.2.3 From 496aec577b5183716ed9d8bcc853ad9003485fe8 Mon Sep 17 00:00:00 2001 From: Christian Daudt Date: Wed, 4 May 2016 17:55:20 -0700 Subject: brcmfmac: Add 4356 sdio support This adds support for the 4356-sdio wireless chip. Signed-off-by: Christian Daudt Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 83430f2ea757..0d126aeb3ec0 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -36,6 +36,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 +#define SDIO_DEVICE_ID_BROADCOM_4356 0x4356 #define SDIO_VENDOR_ID_INTEL 0x0089 #define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX 0x1402 -- cgit v1.2.3 From 5d749d0bbe811c10d9048cde6dfebc761713abfd Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 8 Mar 2016 09:13:52 -0800 Subject: platform/chrome: cros_ec_dev - Fix security issue Prevent memory scribble by checking that ioctl buffer size parameters are sane. Without this check, on 32 bits system, if .insize = 0xffffffff - 20 and .outsize the amount to scribble, we would overflow, allocate a small amounts and be able to write outside of the malloc'ed area. Adding a hard limit allows argument checking of the ioctl. With the current EC, it is expected .insize and .outsize to be at around 512 bytes or less. Signed-off-by: Gwendal Grignou Signed-off-by: Olof Johansson --- include/linux/mfd/cros_ec.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index a677c2bd485c..64184d27e3cd 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -50,9 +50,11 @@ enum { EC_MSG_TX_TRAILER_BYTES, EC_MSG_RX_PROTO_BYTES = 3, - /* Max length of messages */ - EC_MSG_BYTES = EC_PROTO2_MAX_PARAM_SIZE + + /* Max length of messages for proto 2*/ + EC_PROTO2_MSG_BYTES = EC_PROTO2_MAX_PARAM_SIZE + EC_MSG_TX_PROTO_BYTES, + + EC_MAX_MSG_BYTES = 64 * 1024, }; /* -- cgit v1.2.3 From 9b9e3fc4d5a31f6050508f2404369beac4356867 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 9 May 2016 18:11:54 +0200 Subject: KVM: remove NULL return path for vcpu ids >= KVM_MAX_VCPUS Commit c896939f7cff ("KVM: use heuristic for fast VCPU lookup by id") added a return path that prevents vcpu ids to exceed KVM_MAX_VCPUS. This is a problem for powerpc where vcpu ids can grow up to 8*KVM_MAX_VCPUS. This patch simply reverses the logic so that we only try fast path if the vcpu id can be tried as an index in kvm->vcpus[]. The slow path is not affected by the change. Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Greg Kurz Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ad40d44784c7..0a0e00d9c5da 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -447,12 +447,13 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu = NULL; int i; - if (id < 0 || id >= KVM_MAX_VCPUS) + if (id < 0) return NULL; - vcpu = kvm_get_vcpu(kvm, id); + if (id < KVM_MAX_VCPUS) + vcpu = kvm_get_vcpu(kvm, id); if (vcpu && vcpu->vcpu_id == id) return vcpu; kvm_for_each_vcpu(i, vcpu, kvm) -- cgit v1.2.3 From 0b1b1dfd52a67f4f09a18cb82337199bc90ad7fb Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Mon, 9 May 2016 18:13:37 +0200 Subject: kvm: introduce KVM_MAX_VCPU_ID The KVM_MAX_VCPUS define provides the maximum number of vCPUs per guest, and also the upper limit for vCPU ids. This is okay for all archs except PowerPC which can have higher ids, depending on the cpu/core/thread topology. In the worst case (single threaded guest, host with 8 threads per core), it limits the maximum number of vCPUS to KVM_MAX_VCPUS / 8. This patch separates the vCPU numbering from the total number of vCPUs, with the introduction of KVM_MAX_VCPU_ID, as the maximal valid value for vCPU ids plus one. The corresponding KVM_CAP_MAX_VCPU_ID allows userspace to validate vCPU ids before passing them to KVM_CREATE_VCPU. This patch only implements KVM_MAX_VCPU_ID with a specific value for PowerPC. Other archs continue to return KVM_MAX_VCPUS instead. Suggested-by: Radim Krcmar Signed-off-by: Greg Kurz Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0a0e00d9c5da..352889d6e322 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -35,6 +35,10 @@ #include +#ifndef KVM_MAX_VCPU_ID +#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS +#endif + /* * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used * in kvm, other bits are visible for userspace which are defined in -- cgit v1.2.3 From b52f3ed02221252d8ee2c7d756e76fad4a5e84f6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 5 May 2016 11:58:29 -0600 Subject: irqbypass: Disallow NULL token A NULL token is meaningless and can only lead to unintended problems. Error on registration with a NULL token, ignore de-registrations with a NULL token. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/irqbypass.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h index 1551b5b2f4c2..f0f5d2671509 100644 --- a/include/linux/irqbypass.h +++ b/include/linux/irqbypass.h @@ -34,7 +34,7 @@ struct irq_bypass_consumer; /** * struct irq_bypass_producer - IRQ bypass producer definition * @node: IRQ bypass manager private list management - * @token: opaque token to match between producer and consumer + * @token: opaque token to match between producer and consumer (non-NULL) * @irq: Linux IRQ number for the producer device * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) @@ -60,7 +60,7 @@ struct irq_bypass_producer { /** * struct irq_bypass_consumer - IRQ bypass consumer definition * @node: IRQ bypass manager private list management - * @token: opaque token to match between producer and consumer + * @token: opaque token to match between producer and consumer (non-NULL) * @add_producer: Connect the IRQ consumer to an IRQ producer * @del_producer: Disconnect the IRQ consumer from an IRQ producer * @stop: Perform any quiesce operations necessary prior to add/del (optional) -- cgit v1.2.3 From 14717e2031862d9aa2512b24a7df42cf68a977ec Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 5 May 2016 11:58:35 -0600 Subject: kvm: Conditionally register IRQ bypass consumer If we don't support a mechanism for bypassing IRQs, don't register as a consumer. This eliminates meaningless dev_info()s when the connect fails between producer and consumer, such as on AMD systems where kvm_x86_ops->update_pi_irte is not implemented Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 352889d6e322..92a0229044fb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1185,6 +1185,7 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, -- cgit v1.2.3 From 74b20582ac389ee9f18a6fcc0eef244658ce8de0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 10 May 2016 11:19:50 -0700 Subject: net: l3mdev: Add hook in ip and ipv6 Currently the VRF driver uses the rx_handler to switch the skb device to the VRF device. Switching the dev prior to the ip / ipv6 layer means the VRF driver has to duplicate IP/IPv6 processing which adds overhead and makes features such as retaining the ingress device index more complicated than necessary. This patch moves the hook to the L3 layer just after the first NF_HOOK for PRE_ROUTING. This location makes exposing the original ingress device trivial (next patch) and allows adding other NF_HOOKs to the VRF driver in the future. dev_queue_xmit_nit is exported so that the VRF driver can cycle the skb with the switched device through the packet taps to maintain current behavior (tcpdump can be used on either the vrf device or the enslaved devices). Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 17 ++++++++++++++++- include/linux/netdevice.h | 2 ++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 58d6e158755f..5c91b0b055d4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -118,14 +118,29 @@ struct inet6_skb_parm { #define IP6SKB_ROUTERALERT 8 #define IP6SKB_FRAGMENTED 16 #define IP6SKB_HOPBYHOP 32 +#define IP6SKB_L3SLAVE 64 }; +#if defined(CONFIG_NET_L3_MASTER_DEV) +static inline bool skb_l3mdev_slave(__u16 flags) +{ + return flags & IP6SKB_L3SLAVE; +} +#else +static inline bool skb_l3mdev_slave(__u16 flags) +{ + return false; +} +#endif + #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) #define IP6CBMTU(skb) ((struct ip6_mtuinfo *)((skb)->cb)) static inline int inet6_iif(const struct sk_buff *skb) { - return IP6CB(skb)->iif; + bool l3_slave = skb_l3mdev_slave(IP6CB(skb)->flags); + + return l3_slave ? skb->skb_iif : IP6CB(skb)->iif; } struct tcp6_request_sock { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 63580e6d0df4..c2f5112f08f7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3258,6 +3258,8 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); +void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); + extern int netdev_budget; /* Called by rtnetlink.c:rtnl_unlock() */ -- cgit v1.2.3 From 7219ab34f184b5d864be38f5ada7cdff1ab5b18a Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 11 May 2016 00:29:14 +0300 Subject: net/mlx5e: CQE compression CQE compression feature is meant to save PCIe bandwidth by compressing few CQEs into smaller amount of bytes on PCIe. CQE compression can be selectively enabled per CQ. By default is disabled for now and will be enabled later on. Signed-off-by: Tariq Toukan Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ee0d5a937f02..035abdf62cfe 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -685,6 +685,40 @@ struct mlx5_cqe64 { u8 op_own; }; +struct mlx5_mini_cqe8 { + union { + __be32 rx_hash_result; + struct { + __be16 checksum; + __be16 rsvd; + }; + struct { + __be16 wqe_counter; + u8 s_wqe_opcode; + u8 reserved; + } s_wqe_info; + }; + __be32 byte_cnt; +}; + +enum { + MLX5_NO_INLINE_DATA, + MLX5_INLINE_DATA32_SEG, + MLX5_INLINE_DATA64_SEG, + MLX5_COMPRESSED, +}; + +enum { + MLX5_CQE_FORMAT_CSUM = 0x1, +}; + +#define MLX5_MINI_CQE_ARRAY_SIZE 8 + +static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe) +{ + return (cqe->op_own >> 2) & 0x3; +} + static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; -- cgit v1.2.3 From 37bff2b9c6addf6216c8d04e95be596678e8deff Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 11 May 2016 16:36:13 +0300 Subject: qed: Add VF->PF channel infrastructure Communication between VF and PF is based on a dedicated HW channel; VF will prepare a messge, and by signaling the HW the PF would get a notification of that message existance. The PF would then copy the message, process it and DMA an answer back to the VF as a response. The messages themselves are TLV-based - allowing easier backward/forward compatibility. This patch adds the infrastructure of the channel on the PF side - starting with the arrival of the notification and ending with DMAing the response back to the VF. It also adds a dummy-response as reference, as it only lays the groundwork of the communication; it doesn't really add support of any actual messages. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 53ecb37ae563..8914d271ba73 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -327,9 +327,14 @@ struct regpair { __le32 hi; }; +struct vf_pf_channel_eqe_data { + struct regpair msg_addr; +}; + /* Event Data Union */ union event_ring_data { u8 bytes[8]; + struct vf_pf_channel_eqe_data vf_pf_channel; struct async_data async_info; }; -- cgit v1.2.3 From 1408cc1fa48c5450c0dc4b40cbd9718ecb09d1c9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 11 May 2016 16:36:14 +0300 Subject: qed: Introduce VFs This adds the qed VFs for the first time - The vfs are limited functions, with a very different PCI bar structure [when compared with PFs] to better impose the related security demands associated with them. This patch includes the logic neccesary to allow VFs to successfully probe [without actually adding the ability to enable iov]. This includes diverging all the flows that would occur as part of the pci probe of the driver, preventing VF from accessing registers/memories it can't and instead utilize the VF->PF channel to query the PF for needed information. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 57 ++++++++++++++++++++++++++++++++++++++++++ include/linux/qed/qed_if.h | 10 ++++++-- 2 files changed, 65 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 8914d271ba73..3f14c7efe68f 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -285,6 +285,63 @@ #define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN 12 #define PXP_ILT_BLOCK_FACTOR_MULTIPLIER 1024 +#define PXP_VF_BAR0_START_IGU 0 +#define PXP_VF_BAR0_IGU_LENGTH 0x3000 +#define PXP_VF_BAR0_END_IGU (PXP_VF_BAR0_START_IGU + \ + PXP_VF_BAR0_IGU_LENGTH - 1) + +#define PXP_VF_BAR0_START_DQ 0x3000 +#define PXP_VF_BAR0_DQ_LENGTH 0x200 +#define PXP_VF_BAR0_DQ_OPAQUE_OFFSET 0 +#define PXP_VF_BAR0_ME_OPAQUE_ADDRESS (PXP_VF_BAR0_START_DQ + \ + PXP_VF_BAR0_DQ_OPAQUE_OFFSET) +#define PXP_VF_BAR0_ME_CONCRETE_ADDRESS (PXP_VF_BAR0_ME_OPAQUE_ADDRESS \ + + 4) +#define PXP_VF_BAR0_END_DQ (PXP_VF_BAR0_START_DQ + \ + PXP_VF_BAR0_DQ_LENGTH - 1) + +#define PXP_VF_BAR0_START_TSDM_ZONE_B 0x3200 +#define PXP_VF_BAR0_SDM_LENGTH_ZONE_B 0x200 +#define PXP_VF_BAR0_END_TSDM_ZONE_B (PXP_VF_BAR0_START_TSDM_ZONE_B \ + + \ + PXP_VF_BAR0_SDM_LENGTH_ZONE_B \ + - 1) + +#define PXP_VF_BAR0_START_MSDM_ZONE_B 0x3400 +#define PXP_VF_BAR0_END_MSDM_ZONE_B (PXP_VF_BAR0_START_MSDM_ZONE_B \ + + \ + PXP_VF_BAR0_SDM_LENGTH_ZONE_B \ + - 1) + +#define PXP_VF_BAR0_START_USDM_ZONE_B 0x3600 +#define PXP_VF_BAR0_END_USDM_ZONE_B (PXP_VF_BAR0_START_USDM_ZONE_B \ + + \ + PXP_VF_BAR0_SDM_LENGTH_ZONE_B \ + - 1) + +#define PXP_VF_BAR0_START_XSDM_ZONE_B 0x3800 +#define PXP_VF_BAR0_END_XSDM_ZONE_B (PXP_VF_BAR0_START_XSDM_ZONE_B \ + + \ + PXP_VF_BAR0_SDM_LENGTH_ZONE_B \ + - 1) + +#define PXP_VF_BAR0_START_YSDM_ZONE_B 0x3a00 +#define PXP_VF_BAR0_END_YSDM_ZONE_B (PXP_VF_BAR0_START_YSDM_ZONE_B \ + + \ + PXP_VF_BAR0_SDM_LENGTH_ZONE_B \ + - 1) + +#define PXP_VF_BAR0_START_PSDM_ZONE_B 0x3c00 +#define PXP_VF_BAR0_END_PSDM_ZONE_B (PXP_VF_BAR0_START_PSDM_ZONE_B \ + + \ + PXP_VF_BAR0_SDM_LENGTH_ZONE_B \ + - 1) + +#define PXP_VF_BAR0_START_SDM_ZONE_A 0x4000 +#define PXP_VF_BAR0_END_SDM_ZONE_A 0x10000 + +#define PXP_VF_BAR0_GRC_WINDOW_LENGTH 32 + /* ILT Records */ #define PXP_NUM_ILT_RECORDS_BB 7600 #define PXP_NUM_ILT_RECORDS_K2 11000 diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index d72c832a9397..76a6f168a190 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -140,6 +140,13 @@ struct qed_link_output { u32 pause_config; }; +struct qed_probe_params { + enum qed_protocol protocol; + u32 dp_module; + u8 dp_level; + bool is_vf; +}; + #define QED_DRV_VER_STR_SIZE 12 struct qed_slowpath_params { u32 int_mode; @@ -207,8 +214,7 @@ struct qed_common_ops { struct qed_selftest_ops *selftest; struct qed_dev* (*probe)(struct pci_dev *dev, - enum qed_protocol protocol, - u32 dp_module, u8 dp_level); + struct qed_probe_params *params); void (*remove)(struct qed_dev *cdev); -- cgit v1.2.3 From 0b55e27d563f493665693b494735574e68c3c5b9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 11 May 2016 16:36:15 +0300 Subject: qed: IOV configure and FLR While previous patches have already added the necessary logic to probe VFs as well as enabling them in the HW, this patch adds the ability to support VF FLR & SRIOV disable. It then wraps both flows together into the first IOV callback to be provided to the protocol driver - `configure'. This would later to be used to enable and disable SRIOV in the adapter. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 4 ++++ include/linux/qed/qed_iov_if.h | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 include/linux/qed/qed_iov_if.h (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 3a4c806be156..acfafca43aa5 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -13,6 +13,7 @@ #include #include #include +#include struct qed_dev_eth_info { struct qed_dev_info common; @@ -125,6 +126,9 @@ struct qed_eth_cb_ops { struct qed_eth_ops { const struct qed_common_ops *common; +#ifdef CONFIG_QED_SRIOV + const struct qed_iov_hv_ops *iov; +#endif int (*fill_dev_info)(struct qed_dev *cdev, struct qed_dev_eth_info *info); diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h new file mode 100644 index 000000000000..c53bfa6374c5 --- /dev/null +++ b/include/linux/qed/qed_iov_if.h @@ -0,0 +1,20 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_IOV_IF_H +#define _QED_IOV_IF_H + +#include + +/* Structs used by PF to control and manipulate child VFs */ +struct qed_iov_hv_ops { + int (*configure)(struct qed_dev *cdev, int num_vfs_param); + +}; + +#endif -- cgit v1.2.3 From 08feecd7fc709077ce92d21a979f522a5f57170a Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 11 May 2016 16:36:20 +0300 Subject: qed*: Support PVID configuration This adds support for PF control over the VF vlan configuration. I.e., `ip link ... vf vlan ' should now be supported. 1. != 0 => VF receives [unknowingly] only traffic tagged by and tags all outgoing traffic sent by VF with . 2. == 0 ==> Remove the pvid configuration, reverting to previous. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_iov_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h index c53bfa6374c5..825c007d50f1 100644 --- a/include/linux/qed/qed_iov_if.h +++ b/include/linux/qed/qed_iov_if.h @@ -15,6 +15,7 @@ struct qed_iov_hv_ops { int (*configure)(struct qed_dev *cdev, int num_vfs_param); + int (*set_vlan) (struct qed_dev *cdev, u16 vid, int vfid); }; #endif -- cgit v1.2.3 From eff169608c250193e72089dc4ab15cb79e0bd68c Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 11 May 2016 16:36:21 +0300 Subject: qed*: Support forced MAC Allows the PF to enforce the VF's mac. i.e., by using `ip link ... vf mac '. While a MAC is forced, PF would prevent the VF from configuring any other MAC. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 3 +++ include/linux/qed/qed_iov_if.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index acfafca43aa5..e0f6e6482031 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -122,6 +122,7 @@ struct qed_tunn_params { struct qed_eth_cb_ops { struct qed_common_cb_ops common; + void (*force_mac) (void *dev, u8 *mac); }; struct qed_eth_ops { @@ -137,6 +138,8 @@ struct qed_eth_ops { struct qed_eth_cb_ops *ops, void *cookie); + bool(*check_mac) (struct qed_dev *cdev, u8 *mac); + int (*vport_start)(struct qed_dev *cdev, struct qed_start_vport_params *params); diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h index 825c007d50f1..7a67fbf4336a 100644 --- a/include/linux/qed/qed_iov_if.h +++ b/include/linux/qed/qed_iov_if.h @@ -15,6 +15,8 @@ struct qed_iov_hv_ops { int (*configure)(struct qed_dev *cdev, int num_vfs_param); + int (*set_mac) (struct qed_dev *cdev, u8 *mac, int vfid); + int (*set_vlan) (struct qed_dev *cdev, u16 vid, int vfid); }; -- cgit v1.2.3 From 733def6a04bf3d2810dd675e1240f8df94d633c3 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 11 May 2016 16:36:22 +0300 Subject: qed*: IOV link control This adds support in 2 ndo that allow PF to tweak the VF's view of the link - `ndo_set_vf_link_state' to allow it a view independent of the PF's, and `ndo_set_vf_rate' which would allow the PF to limit the VF speed. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_iov_if.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h index 7a67fbf4336a..f364f2bd7a4d 100644 --- a/include/linux/qed/qed_iov_if.h +++ b/include/linux/qed/qed_iov_if.h @@ -18,6 +18,12 @@ struct qed_iov_hv_ops { int (*set_mac) (struct qed_dev *cdev, u8 *mac, int vfid); int (*set_vlan) (struct qed_dev *cdev, u16 vid, int vfid); + + int (*set_link_state) (struct qed_dev *cdev, int vf_id, + int link_state); + + int (*set_rate) (struct qed_dev *cdev, int vfid, + u32 min_rate, u32 max_rate); }; #endif -- cgit v1.2.3 From 6ddc7608258d57d61e16d55461400bb6eff18d72 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 11 May 2016 16:36:23 +0300 Subject: qed*: IOV support spoof-checking Add support in `ndo_set_vf_spoofchk' for allowing PF control over its VF spoof-checking configuration. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_iov_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h index f364f2bd7a4d..2596d30d9e63 100644 --- a/include/linux/qed/qed_iov_if.h +++ b/include/linux/qed/qed_iov_if.h @@ -22,6 +22,8 @@ struct qed_iov_hv_ops { int (*set_link_state) (struct qed_dev *cdev, int vf_id, int link_state); + int (*set_spoof) (struct qed_dev *cdev, int vfid, bool val); + int (*set_rate) (struct qed_dev *cdev, int vfid, u32 min_rate, u32 max_rate); }; -- cgit v1.2.3 From 73390ac9d82bf9f0c849ff57b06a03145fbf05d6 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 11 May 2016 16:36:24 +0300 Subject: qed*: support ndo_get_vf_config Allows the user to view the VF configuration by observing the PF's device. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_iov_if.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h index 2596d30d9e63..5a4f8d0899e9 100644 --- a/include/linux/qed/qed_iov_if.h +++ b/include/linux/qed/qed_iov_if.h @@ -19,6 +19,9 @@ struct qed_iov_hv_ops { int (*set_vlan) (struct qed_dev *cdev, u16 vid, int vfid); + int (*get_config) (struct qed_dev *cdev, int vf_id, + struct ifla_vf_info *ivi); + int (*set_link_state) (struct qed_dev *cdev, int vf_id, int link_state); -- cgit v1.2.3 From 831bfb0e88b54726d6e027a1d547066ffeb8b27e Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 11 May 2016 16:36:25 +0300 Subject: qed*: Tx-switching configuration Device should be configured by default to VEB once VFs are active. This changes the configuration of both PFs' and VFs' vports into enabling tx-switching once sriov is enabled. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 2 ++ include/linux/qed/qed_if.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index e0f6e6482031..6ae8cb4a61d3 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -35,6 +35,8 @@ struct qed_update_vport_params { u8 vport_id; u8 update_vport_active_flg; u8 vport_active_flg; + u8 update_tx_switching_flg; + u8 tx_switching_flg; u8 update_accept_any_vlan_flg; u8 accept_any_vlan; u8 update_rss_flg; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 76a6f168a190..0fd8f247e65f 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -93,6 +93,7 @@ struct qed_dev_info { u32 flash_size; u8 mf_mode; + bool tx_switching; }; enum qed_sb_type { -- cgit v1.2.3 From c0bba3a99f0709c24c2c7ada7cb098966b1d791f Mon Sep 17 00:00:00 2001 From: Kedareswara rao Appana Date: Thu, 7 Apr 2016 10:59:43 +0530 Subject: dmaengine: vdma: Add Support for Xilinx AXI Direct Memory Access Engine This patch adds support for the AXI Direct Memory Access (AXI DMA) core in the existing vdma driver, AXI DMA Core is a soft Xilinx IP core that provides high-bandwidth direct memory access between memory and AXI4-Stream type target peripherals. Signed-off-by: Kedareswara rao Appana Signed-off-by: Vinod Koul --- include/linux/dma/xilinx_dma.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h index 34b98f276ed0..5db17ff8e254 100644 --- a/include/linux/dma/xilinx_dma.h +++ b/include/linux/dma/xilinx_dma.h @@ -41,6 +41,18 @@ struct xilinx_vdma_config { int ext_fsync; }; +/** + * enum xdma_ip_type: DMA IP type. + * + * XDMA_TYPE_AXIDMA: Axi dma ip. + * XDMA_TYPE_VDMA: Axi vdma ip. + * + */ +enum xdma_ip_type { + XDMA_TYPE_AXIDMA = 0, + XDMA_TYPE_VDMA, +}; + int xilinx_vdma_channel_set_config(struct dma_chan *dchan, struct xilinx_vdma_config *cfg); -- cgit v1.2.3 From 07b0e7d49cbcadebad9d3b986f3298e33286dea2 Mon Sep 17 00:00:00 2001 From: Kedareswara rao Appana Date: Thu, 7 Apr 2016 10:59:45 +0530 Subject: dmaengine: vdma: Add Support for Xilinx AXI Central Direct Memory Access Engine This patch adds support for the AXI Central Direct Memory Access (AXI CDMA) core to the existing vdma driver, AXI CDMA is a soft Xilinx IP core that provides high-bandwidth Direct Memory Access(DMA) between a memory-mapped source address and a memory-mapped destination address. Signed-off-by: Kedareswara rao Appana Signed-off-by: Vinod Koul --- include/linux/dma/xilinx_dma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h index 5db17ff8e254..3ae300052553 100644 --- a/include/linux/dma/xilinx_dma.h +++ b/include/linux/dma/xilinx_dma.h @@ -45,11 +45,13 @@ struct xilinx_vdma_config { * enum xdma_ip_type: DMA IP type. * * XDMA_TYPE_AXIDMA: Axi dma ip. + * XDMA_TYPE_CDMA: Axi cdma ip. * XDMA_TYPE_VDMA: Axi vdma ip. * */ enum xdma_ip_type { XDMA_TYPE_AXIDMA = 0, + XDMA_TYPE_CDMA, XDMA_TYPE_VDMA, }; -- cgit v1.2.3 From 50605ffbdaf6d7ccab70d4631fd8347fc78af14f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 May 2016 14:23:31 +0200 Subject: sched/core: Provide a tsk_nr_cpus_allowed() helper tsk_nr_cpus_allowed() is an accessor for task->nr_cpus_allowed which allows us to change the representation of ->nr_cpus_allowed if required. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1462969411-17735-2-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8fea8a5b5ab..38526b67e787 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1930,6 +1930,11 @@ extern int arch_task_struct_size __read_mostly; /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) +static inline int tsk_nr_cpus_allowed(struct task_struct *p) +{ + return p->nr_cpus_allowed; +} + #define TNF_MIGRATED 0x01 #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 -- cgit v1.2.3 From c5076cfe768998e9d395bc8486b29b18b0f99fd9 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Wed, 11 May 2016 17:34:51 -0500 Subject: PCI, of: Move PCI I/O space management to PCI core code No functional changes in this patch. PCI I/O space mapping code does not depend on OF; therefore it can be moved to PCI core code. This way we will be able to use it, e.g., in ACPI PCI code. Suggested-by: Lorenzo Pieralisi Signed-off-by: Tomasz Nowicki Signed-off-by: Bjorn Helgaas CC: Arnd Bergmann CC: Liviu Dudau --- include/linux/of_address.h | 9 --------- include/linux/pci.h | 5 +++++ 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 01c0a556448b..37864734ca50 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -47,10 +47,6 @@ void __iomem *of_io_request_and_map(struct device_node *device, extern const __be32 *of_get_address(struct device_node *dev, int index, u64 *size, unsigned int *flags); -extern int pci_register_io_range(phys_addr_t addr, resource_size_t size); -extern unsigned long pci_address_to_pio(phys_addr_t addr); -extern phys_addr_t pci_pio_to_address(unsigned long pio); - extern int of_pci_range_parser_init(struct of_pci_range_parser *parser, struct device_node *node); extern struct of_pci_range *of_pci_range_parser_one( @@ -86,11 +82,6 @@ static inline const __be32 *of_get_address(struct device_node *dev, int index, return NULL; } -static inline phys_addr_t pci_pio_to_address(unsigned long pio) -{ - return 0; -} - static inline int of_pci_range_parser_init(struct of_pci_range_parser *parser, struct device_node *node) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 004b8133417d..1824ef80e10d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1164,6 +1164,9 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, void *alignf_data); +int pci_register_io_range(phys_addr_t addr, resource_size_t size); +unsigned long pci_address_to_pio(phys_addr_t addr); +phys_addr_t pci_pio_to_address(unsigned long pio); int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar) @@ -1480,6 +1483,8 @@ static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; } static inline void pci_release_regions(struct pci_dev *dev) { } +static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; } + static inline void pci_block_cfg_access(struct pci_dev *dev) { } static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev) { return 0; } -- cgit v1.2.3 From c16aea129bf788127465771cab97134a94af33e4 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 13 Apr 2016 19:11:03 +0300 Subject: net/mlx5: Fix mlx5 ifc cmd_hca_cap bad offsets All reserved fields after early_vf_enable are off by 1, since early_vf_enable was not explicitly declared as array of size 1. Reserved field before cqe_zip had a wrong size, it should be 0x80 + 0x3f. Fixes: b0844444590e ("net/mlx5_core: Introduce access function to read internal timer ") Fixes: b4ff3a36d3e4 ("net/mlx5: Use offset based reserved field names in the IFC header file") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Matan Barak Acked-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 107 ++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c15b8a864937..c300e7491d80 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -750,21 +750,21 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ets[0x1]; u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; - u8 early_vf_enable; - u8 reserved_at_1a8[0x2]; + u8 early_vf_enable[0x1]; + u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; u8 reserved_at_1af[0x6]; u8 port_type[0x2]; u8 num_ports[0x8]; - u8 reserved_at_1bf[0x3]; + u8 reserved_at_1c0[0x3]; u8 log_max_msg[0x5]; - u8 reserved_at_1c7[0x4]; + u8 reserved_at_1c8[0x4]; u8 max_tc[0x4]; - u8 reserved_at_1cf[0x6]; + u8 reserved_at_1d0[0x6]; u8 rol_s[0x1]; u8 rol_g[0x1]; - u8 reserved_at_1d7[0x1]; + u8 reserved_at_1d8[0x1]; u8 wol_s[0x1]; u8 wol_g[0x1]; u8 wol_a[0x1]; @@ -774,47 +774,47 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 wol_p[0x1]; u8 stat_rate_support[0x10]; - u8 reserved_at_1ef[0xc]; + u8 reserved_at_1f0[0xc]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; u8 reserved_at_200[0x3]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_204[0xa]; + u8 reserved_at_205[0xa]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; - u8 reserved_at_212[0x1]; + u8 reserved_at_213[0x1]; u8 wq_signature[0x1]; u8 sctr_data_cqe[0x1]; - u8 reserved_at_215[0x1]; + u8 reserved_at_216[0x1]; u8 sho[0x1]; u8 tph[0x1]; u8 rf[0x1]; u8 dct[0x1]; - u8 reserved_at_21a[0x1]; + u8 reserved_at_21b[0x1]; u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; - u8 reserved_at_21e[0x1]; + u8 reserved_at_21f[0x1]; u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 reserved_at_222[0x3]; + u8 reserved_at_223[0x3]; u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; - u8 reserved_at_228[0x1]; + u8 reserved_at_229[0x1]; u8 scqe_break_moderation[0x1]; u8 reserved_at_22a[0x1]; u8 cd[0x1]; - u8 reserved_at_22c[0x1]; + u8 reserved_at_22d[0x1]; u8 apm[0x1]; u8 vector_calc[0x1]; u8 reserved_at_22f[0x1]; u8 imaicl[0x1]; - u8 reserved_at_231[0x4]; + u8 reserved_at_232[0x4]; u8 qkv[0x1]; u8 pkv[0x1]; u8 set_deth_sqpn[0x1]; @@ -824,98 +824,101 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_at_23f[0xa]; + u8 reserved_at_240[0xa]; u8 uar_sz[0x6]; - u8 reserved_at_24f[0x8]; + u8 reserved_at_250[0x8]; u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_at_260[0x1]; + u8 reserved_at_261[0x1]; u8 pad_tx_eth_packet[0x1]; - u8 reserved_at_262[0x8]; + u8 reserved_at_263[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_at_26f[0x10]; + u8 reserved_at_270[0x10]; - u8 reserved_at_27f[0x10]; + u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_29f[0x10]; + u8 reserved_at_2a0[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_at_2bf[0x10]; + u8 reserved_at_2c0[0x10]; u8 max_wqe_sz_sq_dc[0x10]; - u8 reserved_at_2df[0x7]; + u8 reserved_at_2e0[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_at_2ff[0x18]; + u8 reserved_at_300[0x18]; u8 log_max_mcg[0x8]; - u8 reserved_at_31f[0x3]; + u8 reserved_at_320[0x3]; u8 log_max_transport_domain[0x5]; - u8 reserved_at_327[0x3]; + u8 reserved_at_328[0x3]; u8 log_max_pd[0x5]; - u8 reserved_at_32f[0xb]; + u8 reserved_at_330[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_at_33f[0x20]; + u8 reserved_at_340[0x20]; - u8 reserved_at_35f[0x3]; + u8 reserved_at_360[0x3]; u8 log_max_rq[0x5]; - u8 reserved_at_367[0x3]; + u8 reserved_at_368[0x3]; u8 log_max_sq[0x5]; - u8 reserved_at_36f[0x3]; + u8 reserved_at_370[0x3]; u8 log_max_tir[0x5]; - u8 reserved_at_377[0x3]; + u8 reserved_at_378[0x3]; u8 log_max_tis[0x5]; u8 basic_cyclic_rcv_wqe[0x1]; - u8 reserved_at_380[0x2]; + u8 reserved_at_381[0x2]; u8 log_max_rmp[0x5]; - u8 reserved_at_387[0x3]; + u8 reserved_at_388[0x3]; u8 log_max_rqt[0x5]; - u8 reserved_at_38f[0x3]; + u8 reserved_at_390[0x3]; u8 log_max_rqt_size[0x5]; - u8 reserved_at_397[0x3]; + u8 reserved_at_398[0x3]; u8 log_max_tis_per_sq[0x5]; - u8 reserved_at_39f[0x3]; + u8 reserved_at_3a0[0x3]; u8 log_max_stride_sz_rq[0x5]; - u8 reserved_at_3a7[0x3]; + u8 reserved_at_3a8[0x3]; u8 log_min_stride_sz_rq[0x5]; - u8 reserved_at_3af[0x3]; + u8 reserved_at_3b0[0x3]; u8 log_max_stride_sz_sq[0x5]; - u8 reserved_at_3b7[0x3]; + u8 reserved_at_3b8[0x3]; u8 log_min_stride_sz_sq[0x5]; - u8 reserved_at_3bf[0x1b]; + u8 reserved_at_3c0[0x1b]; u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; - u8 reserved_at_3e0[0xa]; + u8 reserved_at_3e1[0xa]; u8 log_max_vlan_list[0x5]; - u8 reserved_at_3ef[0x3]; + u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; - u8 reserved_at_3f7[0x3]; + u8 reserved_at_3f8[0x3]; u8 log_max_current_uc_list[0x5]; - u8 reserved_at_3ff[0x80]; + u8 reserved_at_400[0x80]; - u8 reserved_at_47f[0x3]; + u8 reserved_at_480[0x3]; u8 log_max_l2_table[0x5]; - u8 reserved_at_487[0x8]; + u8 reserved_at_488[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_at_49f[0x20]; + u8 reserved_at_4a0[0x20]; u8 device_frequency_mhz[0x20]; u8 device_frequency_khz[0x20]; - u8 reserved_at_4ff[0x5f]; + + u8 reserved_at_500[0x80]; + + u8 reserved_at_580[0x3f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_at_57f[0x220]; + u8 reserved_at_5e0[0x220]; }; enum mlx5_flow_destination_type { -- cgit v1.2.3 From 80835cba4b857485b068e1ce83512e896e6f001e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 13 Apr 2016 19:11:04 +0300 Subject: net/mlx5: Update mlx5_ifc hardware features Adding the needed mlx5_ifc hardware bits and structs for the following features: * Add vport to steering commands for SRIOV ACL support * Add mlcr, pcmr and mcia registers for dump module EEPROM * Add support for FCS, beacon led and disable_link bits to hca caps * Add CQE period mode bit in CQ context for CQE based CQ moderation support * Add umr SQ bit for fragmented memory registration * Add needed bits and caps for Striding RQ support In-order to avoid possible future conflicts between rdma and net-next we added all expected updates to this file for this release. If more changes will be submitted, we plan to do it only through one of the subsystems, probably net-next. All updated bits in this patch will be later used in the up-coming submissions to net-next and rdma trees. Signed-off-by: Saeed Mahameed Signed-off-by: Matan Barak Acked-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 146 +++++++++++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c300e7491d80..4ce4ea422a10 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -513,7 +513,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 max_lso_cap[0x5]; u8 reserved_at_10[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_at_18[0x3]; + u8 reg_umr_sq[0x1]; + u8 scatter_fcs[0x1]; + u8 reserved_at_1a[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; @@ -648,7 +650,7 @@ struct mlx5_ifc_vector_calc_cap_bits { enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, - MLX5_WQ_TYPE_STRQ = 0x2, + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ = 0x2, }; enum { @@ -753,7 +755,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 early_vf_enable[0x1]; u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; - u8 reserved_at_1af[0x6]; + u8 reserved_at_1af[0x2]; + u8 ports_check[0x1]; + u8 reserved_at_1b2[0x1]; + u8 disable_link_up[0x1]; + u8 beacon_led[0x1]; u8 port_type[0x2]; u8 num_ports[0x8]; @@ -778,7 +784,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; - u8 reserved_at_200[0x3]; + u8 striding_rq[0x1]; + u8 reserved_at_201[0x2]; u8 ipoib_basic_offloads[0x1]; u8 reserved_at_205[0xa]; u8 drain_sigerr[0x1]; @@ -807,12 +814,12 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 block_lb_mc[0x1]; u8 reserved_at_229[0x1]; u8 scqe_break_moderation[0x1]; - u8 reserved_at_22a[0x1]; + u8 cq_period_start_from_cqe[0x1]; u8 cd[0x1]; u8 reserved_at_22d[0x1]; u8 apm[0x1]; u8 vector_calc[0x1]; - u8 reserved_at_22f[0x1]; + u8 umr_ptr_rlky[0x1]; u8 imaicl[0x1]; u8 reserved_at_232[0x4]; u8 qkv[0x1]; @@ -913,10 +920,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_500[0x80]; u8 reserved_at_580[0x3f]; - u8 cqe_zip[0x1]; + u8 cqe_compression[0x1]; - u8 cqe_zip_timeout[0x10]; - u8 cqe_zip_max_num[0x10]; + u8 cqe_compression_timeout[0x10]; + u8 cqe_compression_max_num[0x10]; u8 reserved_at_5e0[0x220]; }; @@ -1000,7 +1007,13 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0x4e0]; + u8 reserved_at_120[0x15]; + u8 log_wqe_num_of_strides[0x3]; + u8 two_byte_shift_en[0x1]; + u8 reserved_at_139[0x4]; + u8 log_wqe_stride_size[0x3]; + + u8 reserved_at_140[0x4c0]; struct mlx5_ifc_cmd_pas_bits pas[0]; }; @@ -2199,7 +2212,8 @@ struct mlx5_ifc_sqc_bits { u8 flush_in_error_en[0x1]; u8 reserved_at_4[0x4]; u8 state[0x4]; - u8 reserved_at_c[0x14]; + u8 reg_umr[0x1]; + u8 reserved_at_d[0x13]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; @@ -2247,7 +2261,8 @@ enum { struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; - u8 reserved_at_1[0x2]; + u8 reserved_at_1[0x1]; + u8 scatter_fcs[0x1]; u8 vsd[0x1]; u8 mem_rq_type[0x4]; u8 state[0x4]; @@ -2604,6 +2619,11 @@ enum { MLX5_CQC_ST_FIRED = 0xa, }; +enum { + MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, + MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, +}; + struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_at_4[0x4]; @@ -2612,8 +2632,8 @@ struct mlx5_ifc_cqc_bits { u8 reserved_at_c[0x1]; u8 scqe_break_moderation_en[0x1]; u8 oi[0x1]; - u8 reserved_at_f[0x2]; - u8 cqe_zip_en[0x1]; + u8 cq_period_mode[0x2]; + u8 cqe_comp_en[0x1]; u8 mini_cqe_res_format[0x2]; u8 st[0x4]; u8 reserved_at_18[0x8]; @@ -2987,7 +3007,11 @@ struct mlx5_ifc_set_fte_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5181,7 +5205,11 @@ struct mlx5_ifc_destroy_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5208,7 +5236,11 @@ struct mlx5_ifc_destroy_flow_group_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5349,7 +5381,11 @@ struct mlx5_ifc_delete_fte_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5795,7 +5831,11 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -5839,7 +5879,11 @@ struct mlx5_ifc_create_flow_group_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -6372,6 +6416,17 @@ struct mlx5_ifc_ptys_reg_bits { u8 reserved_at_1a0[0x60]; }; +struct mlx5_ifc_mlcr_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x20]; + + u8 beacon_duration[0x10]; + u8 reserved_at_40[0x10]; + + u8 beacon_remain[0x10]; +}; + struct mlx5_ifc_ptas_reg_bits { u8 reserved_at_0[0x20]; @@ -6781,6 +6836,16 @@ struct mlx5_ifc_pamp_reg_bits { u8 index_data[18][0x10]; }; +struct mlx5_ifc_pcmr_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x2e]; + u8 fcs_cap[0x1]; + u8 reserved_at_3f[0x1f]; + u8 fcs_chk[0x1]; + u8 reserved_at_5f[0x1]; +}; + struct mlx5_ifc_lane_2_module_mapping_bits { u8 reserved_at_0[0x6]; u8 rx_lane[0x2]; @@ -7117,6 +7182,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pspa_reg_bits pspa_reg; struct mlx5_ifc_ptas_reg_bits ptas_reg; struct mlx5_ifc_ptys_reg_bits ptys_reg; + struct mlx5_ifc_mlcr_reg_bits mlcr_reg; struct mlx5_ifc_pude_reg_bits pude_reg; struct mlx5_ifc_pvlc_reg_bits pvlc_reg; struct mlx5_ifc_slrg_reg_bits slrg_reg; @@ -7150,7 +7216,11 @@ struct mlx5_ifc_set_flow_table_root_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x40]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x20]; u8 table_type[0x8]; u8 reserved_at_88[0x18]; @@ -7181,7 +7251,9 @@ struct mlx5_ifc_modify_flow_table_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x20]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; u8 reserved_at_60[0x10]; u8 modify_field_select[0x10]; @@ -7247,4 +7319,34 @@ struct mlx5_ifc_qtct_reg_bits { u8 tclass[0x3]; }; +struct mlx5_ifc_mcia_reg_bits { + u8 l[0x1]; + u8 reserved_at_1[0x7]; + u8 module[0x8]; + u8 reserved_at_10[0x8]; + u8 status[0x8]; + + u8 i2c_device_address[0x8]; + u8 page_number[0x8]; + u8 device_address[0x10]; + + u8 reserved_at_40[0x10]; + u8 size[0x10]; + + u8 reserved_at_60[0x20]; + + u8 dword_0[0x20]; + u8 dword_1[0x20]; + u8 dword_2[0x20]; + u8 dword_3[0x20]; + u8 dword_4[0x20]; + u8 dword_5[0x20]; + u8 dword_6[0x20]; + u8 dword_7[0x20]; + u8 dword_8[0x20]; + u8 dword_9[0x20]; + u8 dword_10[0x20]; + u8 dword_11[0x20]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From b3d39032d7b33029373fbce6459aff6ac316e130 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 28 Mar 2016 20:36:59 -0700 Subject: remoteproc: Add additional crash reasons The Qualcomm WCNSS can crash by watchdog or a fatal software error. Add these types to the list of remoteproc crash reasons. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 9c4e1384f636..1c457a8dd5a6 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -365,6 +365,8 @@ enum rproc_state { /** * enum rproc_crash_type - remote processor crash types * @RPROC_MMUFAULT: iommu fault + * @RPROC_WATCHDOG: watchdog bite + * @RPROC_FATAL_ERROR fatal error * * Each element of the enum is used as an array index. So that, the value of * the elements should be always something sane. @@ -373,6 +375,8 @@ enum rproc_state { */ enum rproc_crash_type { RPROC_MMUFAULT, + RPROC_WATCHDOG, + RPROC_FATAL_ERROR, }; /** -- cgit v1.2.3 From 6d0a07edd17cfc12fdc1f36de8072fa17cc3666f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 12 May 2016 15:42:25 -0700 Subject: mm: thp: calculate the mapcount correctly for THP pages during WP faults This will provide fully accuracy to the mapcount calculation in the write protect faults, so page pinning will not get broken by false positive copy-on-writes. total_mapcount() isn't the right calculation needed in reuse_swap_page(), so this introduces a page_trans_huge_mapcount() that is effectively the full accurate return value for page_mapcount() if dealing with Transparent Hugepages, however we only use the page_trans_huge_mapcount() during COW faults where it strictly needed, due to its higher runtime cost. This also provide at practical zero cost the total_mapcount information which is needed to know if we can still relocate the page anon_vma to the local vma. If page_trans_huge_mapcount() returns 1 we can reuse the page no matter if it's a pte or a pmd_trans_huge triggering the fault, but we can only relocate the page anon_vma to the local vma->anon_vma if we're sure it's only this "vma" mapping the whole THP physical range. Kirill A. Shutemov discovered the problem with moving the page anon_vma to the local vma->anon_vma in a previous version of this patch and another problem in the way page_move_anon_rmap() was called. Andrew Morton discovered that CONFIG_SWAP=n wouldn't build in a previous version, because reuse_swap_page must be a macro to call page_trans_huge_mapcount from swap.h, so this uses a macro again instead of an inline function. With this change at least it's a less dangerous usage than it was before, because "page" is used only once now, while with the previous code reuse_swap_page(page++) would have called page_mapcount on page+1 and it would have increased page twice instead of just once. Dean Luick noticed an uninitialized variable that could result in a rmap inefficiency for the non-THP case in a previous version. Mike Marciniszyn said: : Our RDMA tests are seeing an issue with memory locking that bisects to : commit 61f5d698cc97 ("mm: re-enable THP") : : The test program registers two rather large MRs (512M) and RDMA : writes data to a passive peer using the first and RDMA reads it back : into the second MR and compares that data. The sizes are chosen randomly : between 0 and 1024 bytes. : : The test will get through a few (<= 4 iterations) and then gets a : compare error. : : Tracing indicates the kernel logical addresses associated with the individual : pages at registration ARE correct , the data in the "RDMA read response only" : packets ARE correct. : : The "corruption" occurs when the packet crosse two pages that are not physically : contiguous. The second page reads back as zero in the program. : : It looks like the user VA at the point of the compare error no longer points to : the same physical address as was registered. : : This patch totally resolves the issue! Link: http://lkml.kernel.org/r/1462547040-1737-2-git-send-email-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reviewed-by: "Kirill A. Shutemov" Reviewed-by: Dean Luick Tested-by: Alex Williamson Tested-by: Mike Marciniszyn Tested-by: Josh Collier Cc: Marc Haber Cc: [4.5] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 +++++++++ include/linux/swap.h | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 864d7221de84..8f468e0d2534 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -500,11 +500,20 @@ static inline int page_mapcount(struct page *page) #ifdef CONFIG_TRANSPARENT_HUGEPAGE int total_mapcount(struct page *page); +int page_trans_huge_mapcount(struct page *page, int *total_mapcount); #else static inline int total_mapcount(struct page *page) { return page_mapcount(page); } +static inline int page_trans_huge_mapcount(struct page *page, + int *total_mapcount) +{ + int mapcount = page_mapcount(page); + if (total_mapcount) + *total_mapcount = mapcount; + return mapcount; +} #endif static inline struct page *virt_to_head_page(const void *x) diff --git a/include/linux/swap.h b/include/linux/swap.h index 0a4cd4703f40..ad220359f1b0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -418,7 +418,7 @@ extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); -extern int reuse_swap_page(struct page *); +extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *); struct backing_dev_info; @@ -513,8 +513,8 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -#define reuse_swap_page(page) \ - (!PageTransCompound(page) && page_mapcount(page) == 1) +#define reuse_swap_page(page, total_mapcount) \ + (page_trans_huge_mapcount(page, total_mapcount) == 1) static inline int try_to_free_swap(struct page *page) { -- cgit v1.2.3 From ae05327a00fd47c34dfe25294b359a3f3fef96e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 May 2016 20:36:01 -0400 Subject: ext4: switch to ->iterate_shared() Note that we need relax_dir() equivalent for directories locked shared. Signed-off-by: Al Viro --- include/linux/fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3dc0258a2b64..e87245ac6941 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3134,6 +3134,13 @@ static inline bool dir_relax(struct inode *inode) return !IS_DEADDIR(inode); } +static inline bool dir_relax_shared(struct inode *inode) +{ + inode_unlock_shared(inode); + inode_lock_shared(inode); + return !IS_DEADDIR(inode); +} + extern bool path_noexec(const struct path *path); extern void inode_nohighmem(struct inode *inode); -- cgit v1.2.3 From 2ab71a02c56f8244ac611b5c6e6603c6fe83b966 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 25 Jan 2016 09:50:29 +0100 Subject: MIPS: BCM47xx: Move SPROM driver to drivers/firmware/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broadcom ARM home routers store SPROM content in NVRAM just like MIPS ones. To share SPROM code we need to move it out of arch/mips/ to some common place. We already have bcm47xx_nvram in firmware path and SPROM should fit there as well. This driver is responsible for parsing SoC configuration data into a struct shared between ssb and bcma buses. This was tested with BCM4706 & BCM5357C0 (BCM47XX) and BCM4708A0 (ARCH_BCM_5301X). Signed-off-by: Rafał Miłecki Cc: Hauke Mehrtens Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/12210/ Signed-off-by: Ralf Baechle --- include/linux/bcm47xx_sprom.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/linux/bcm47xx_sprom.h (limited to 'include/linux') diff --git a/include/linux/bcm47xx_sprom.h b/include/linux/bcm47xx_sprom.h new file mode 100644 index 000000000000..c06b47c84e1a --- /dev/null +++ b/include/linux/bcm47xx_sprom.h @@ -0,0 +1,24 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __BCM47XX_SPROM_H +#define __BCM47XX_SPROM_H + +#include +#include +#include + +#ifdef CONFIG_BCM47XX_SPROM +int bcm47xx_sprom_register_fallbacks(void); +#else +static inline int bcm47xx_sprom_register_fallbacks(void) +{ + return -ENOTSUPP; +}; +#endif + +#endif /* __BCM47XX_SPROM_H */ -- cgit v1.2.3 From 835d2b452969820fd67a755a2c01fb6e12822448 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 3 Feb 2016 03:15:28 +0000 Subject: irqchip: mips-gic: Provide VP ID accessor Provide a gic_read_local_vp_id() function to read the VCNUM field of the GICs local VP_IDENT register. This will be used by a further patch to check that the value reported by the GIC matches up with the kernels calculation. Signed-off-by: Paul Burton Acked-by: Jason Cooper Cc: Andrew Bresticker Cc: Jason Cooper Cc: Thomas Gleixner Cc: James Hogan Cc: Marc Zyngier Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/12334/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 80f89e4a29ac..81f930b0bca9 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -103,6 +103,7 @@ #define GIC_VPE_SWINT0_MAP_OFS 0x0054 #define GIC_VPE_SWINT1_MAP_OFS 0x0058 #define GIC_VPE_OTHER_ADDR_OFS 0x0080 +#define GIC_VP_IDENT_OFS 0x0088 #define GIC_VPE_WD_CONFIG0_OFS 0x0090 #define GIC_VPE_WD_COUNT0_OFS 0x0094 #define GIC_VPE_WD_INITIAL0_OFS 0x0098 @@ -211,6 +212,10 @@ #define GIC_VPE_SMASK_FDC_SHF 6 #define GIC_VPE_SMASK_FDC_MSK (MSK(1) << GIC_VPE_SMASK_FDC_SHF) +/* GIC_VP_IDENT fields */ +#define GIC_VP_IDENT_VCNUM_SHF 0 +#define GIC_VP_IDENT_VCNUM_MSK (MSK(6) << GIC_VP_IDENT_VCNUM_SHF) + /* GIC nomenclature for Core Interrupt Pins. */ #define GIC_CPU_INT0 0 /* Core Interrupt 2 */ #define GIC_CPU_INT1 1 /* . */ @@ -278,4 +283,16 @@ static inline int gic_get_usm_range(struct resource *gic_usm_res) #endif /* CONFIG_MIPS_GIC */ +/** + * gic_read_local_vp_id() - read the local VPs VCNUM + * + * Read the VCNUM of the local VP from the GIC_VP_IDENT register and + * return it to the caller. This ID should be used to refer to the VP + * via the GICs VP-other region, or when calculating an offset to a + * bit representing the VP in interrupt masks. + * + * Return: The VCNUM value for the local VP. + */ +extern unsigned gic_read_local_vp_id(void); + #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit v1.2.3 From e55d5312444087eb6bfb34c1cd5f6e0bf626cf26 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 11 Feb 2016 13:36:54 +0100 Subject: crash_dump: Add vmcore_elf32_check_arch parse_crash_elf{32|64}_headers will check the headers via the elf_check_arch respectively vmcore_elf64_check_arch macro. The MIPS architecture implements those two macros differently. In order to make the differentiation more explicit, let's introduce an vmcore_elf32_check_arch to allow the archs to overwrite it. Signed-off-by: Daniel Wagner Suggested-by: Maciej W. Rozycki Reviewed-by: Maciej W. Rozycki Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/12535/ Signed-off-by: Ralf Baechle --- include/linux/crash_dump.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 3849fce7ecfe..3873697ba21c 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -34,9 +34,13 @@ void vmcore_cleanup(void); /* * Architecture code can redefine this if there are any special checks - * needed for 64-bit ELF vmcores. In case of 32-bit only architecture, - * this can be set to zero. + * needed for 32-bit ELF or 64-bit ELF vmcores. In case of 32-bit + * only architecture, vmcore_elf64_check_arch can be set to zero. */ +#ifndef vmcore_elf32_check_arch +#define vmcore_elf32_check_arch(x) elf_check_arch(x) +#endif + #ifndef vmcore_elf64_check_arch #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) #endif -- cgit v1.2.3 From ca9eb49aa9562eaadf3cea071ec7018ad6800425 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 8 Feb 2016 18:43:50 +0000 Subject: SIGNAL: Move generic copy_siginfo() to signal.h The generic copy_siginfo() is currently defined in asm-generic/siginfo.h, after including uapi/asm-generic/siginfo.h which defines the generic struct siginfo. However this makes it awkward for an architecture to use it if it has to define its own struct siginfo (e.g. MIPS and potentially IA64), since it means that asm-generic/siginfo.h can only be included after defining the arch-specific siginfo, which may be problematic if the arch-specific definition needs definitions from uapi/asm-generic/siginfo.h. It is possible to work around this by first including uapi/asm-generic/siginfo.h to get the constants before defining the arch-specific siginfo, and include asm-generic/siginfo.h after. However uapi headers can't be included by other uapi headers, so that first include has to be in an ifdef __kernel__, with the non __kernel__ case including the non-UAPI header instead. Instead of that mess, move the generic copy_siginfo() definition into linux/signal.h, which allows an arch-specific uapi/asm/siginfo.h to include asm-generic/siginfo.h and define the arch-specific siginfo, and for the generic copy_siginfo() to see that arch-specific definition. Signed-off-by: James Hogan Cc: Arnd Bergmann Cc: Ralf Baechle Cc: Petr Malat Cc: Tony Luck Cc: Fenghua Yu Cc: Christopher Ferris Cc: linux-arch@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-ia64@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: # 4.0- Patchwork: https://patchwork.linux-mips.org/patch/12478/ Signed-off-by: Ralf Baechle --- include/linux/signal.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 92557bbce7e7..d80259afb9e5 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -28,6 +28,21 @@ struct sigpending { sigset_t signal; }; +#ifndef HAVE_ARCH_COPY_SIGINFO + +#include + +static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) +{ + if (from->si_code < 0) + memcpy(to, from, sizeof(*to)); + else + /* _sigchld is currently the largest know union member */ + memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld)); +} + +#endif + /* * Define some primitives to manipulate sigset_t. */ -- cgit v1.2.3 From 3491caf2755e9f312666712510d80b00c81ff247 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 13 May 2016 12:16:35 +0200 Subject: KVM: halt_polling: provide a way to qualify wakeups during poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some wakeups should not be considered a sucessful poll. For example on s390 I/O interrupts are usually floating, which means that _ALL_ CPUs would be considered runnable - letting all vCPUs poll all the time for transactional like workload, even if one vCPU would be enough. This can result in huge CPU usage for large guests. This patch lets architectures provide a way to qualify wakeups if they should be considered a good/bad wakeups in regard to polls. For s390 the implementation will fence of halt polling for anything but known good, single vCPU events. The s390 implementation for floating interrupts does a wakeup for one vCPU, but the interrupt will be delivered by whatever CPU checks first for a pending interrupt. We prefer the woken up CPU by marking the poll of this CPU as "good" poll. This code will also mark several other wakeup reasons like IPI or expired timers as "good". This will of course also mark some events as not sucessful. As KVM on z runs always as a 2nd level hypervisor, we prefer to not poll, unless we are really sure, though. This patch successfully limits the CPU usage for cases like uperf 1byte transactional ping pong workload or wakeup heavy workload like OLTP while still providing a proper speedup. This also introduced a new vcpu stat "halt_poll_no_tuning" that marks wakeups that are considered not good for polling. Signed-off-by: Christian Borntraeger Acked-by: Radim Krčmář (for an earlier version) Cc: David Matlack Cc: Wanpeng Li [Rename config symbol. - Paolo] Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 92a0229044fb..bbcd921d7cb0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -229,6 +229,7 @@ struct kvm_vcpu { sigset_t sigset; struct kvm_vcpu_stat stat; unsigned int halt_poll_ns; + bool valid_wakeup; #ifdef CONFIG_HAS_IOMEM int mmio_needed; @@ -1196,4 +1197,18 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ +#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS +/* If we wakeup during the poll time, was it a sucessful poll? */ +static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) +{ + return vcpu->valid_wakeup; +} + +#else +static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) +{ + return true; +} +#endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */ + #endif -- cgit v1.2.3 From d9e4084f6c9746e51a78a4d7ebf4983023289b32 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 May 2016 10:53:47 -0400 Subject: svcrdma: Generalize svc_rdma_xdr_decode_req() Clean up: Pass in just the piece of the svc_rqst that is needed here. While we're in the area, add an informative documenting comment. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 3081339968c3..d6917b896d3a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -199,7 +199,7 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct xdr_buf *rcvbuf); /* svc_rdma_marshal.c */ -extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg *, struct svc_rqst *); +extern int svc_rdma_xdr_decode_req(struct xdr_buf *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode, __be32 *); -- cgit v1.2.3 From c4263233f30e72f2645ff83c9074c994f88b015a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 23:22:50 +1000 Subject: drivers/of: Specify parent node in of_fdt_unflatten_tree() This adds one more argument to of_fdt_unflatten_tree() to specify the parent node of the FDT blob that is going to be unflattened. In the result, the function can be used to unflatten FDT blob that represents device sub-tree in PowerNV PCI hotplug driver. Cc: Jyri Sarha Signed-off-by: Gavin Shan Acked-by: Rob Herring Acked-by: Jyri Sarha Signed-off-by: Rob Herring --- include/linux/of_fdt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 2fbe8682a66f..1bffcbd56f8e 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -38,6 +38,7 @@ extern bool of_fdt_is_big_endian(const void *blob, extern int of_fdt_match(const void *blob, unsigned long node, const char *const *compat); extern void of_fdt_unflatten_tree(const unsigned long *blob, + struct device_node *dad, struct device_node **mynodes); /* TBD: Temporary export of fdt globals - remove when code fully merged */ -- cgit v1.2.3 From 83262418b0ef8bda66eca7c72d4c24ae6f7b230e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 3 May 2016 23:22:51 +1000 Subject: drivers/of: Return allocated memory from of_fdt_unflatten_tree() This returns the allocate memory chunk, storing the unflattened device tree, from of_fdt_unflatten_tree() so that memory chunk can be released on demand in PowerNV PCI hotplug driver. Signed-off-by: Gavin Shan Acked-by: Rob Herring Signed-off-by: Rob Herring --- include/linux/of_fdt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 1bffcbd56f8e..901ec01c9fba 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -37,9 +37,9 @@ extern bool of_fdt_is_big_endian(const void *blob, unsigned long node); extern int of_fdt_match(const void *blob, unsigned long node, const char *const *compat); -extern void of_fdt_unflatten_tree(const unsigned long *blob, - struct device_node *dad, - struct device_node **mynodes); +extern void *of_fdt_unflatten_tree(const unsigned long *blob, + struct device_node *dad, + struct device_node **mynodes); /* TBD: Temporary export of fdt globals - remove when code fully merged */ extern int __initdata dt_root_addr_cells; -- cgit v1.2.3 From 9a99425f0736a416442525ac7b15903173888b86 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 25 Feb 2016 09:56:04 -0800 Subject: iscsi_ibft: Add prefix-len attr and display netmask The iBFT table only specifies a prefix length, not a netmask. And the netmask is pretty much pointless for IPv6. So introduce a new attribute 'prefix-len'. Some older user-space code might rely on the netmask attribute being present, so we should always display it. Changes from v1: - Combined two patches into one Changes from v2: - Cleaned up/corrected wording for patch description Signed-off-by: Hannes Reinecke Signed-off-by: Lee Duncan Reviewed-by: Mike Christie Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/iscsi_boot_sysfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iscsi_boot_sysfs.h b/include/linux/iscsi_boot_sysfs.h index 2a8b1659bf35..548d55395488 100644 --- a/include/linux/iscsi_boot_sysfs.h +++ b/include/linux/iscsi_boot_sysfs.h @@ -23,6 +23,7 @@ enum iscsi_boot_eth_properties_enum { ISCSI_BOOT_ETH_INDEX, ISCSI_BOOT_ETH_FLAGS, ISCSI_BOOT_ETH_IP_ADDR, + ISCSI_BOOT_ETH_PREFIX_LEN, ISCSI_BOOT_ETH_SUBNET_MASK, ISCSI_BOOT_ETH_ORIGIN, ISCSI_BOOT_ETH_GATEWAY, -- cgit v1.2.3 From b3c8eb50383178f3a4dcf1dc867001156da6473d Mon Sep 17 00:00:00 2001 From: David Bond Date: Wed, 23 Mar 2016 21:49:26 -0400 Subject: ibft: Expose iBFT acpi header via sysfs Some ethernet adapter vendors are supplying products which support optional (payed license) features. On some adapters this includes a hardware iscsi initiator. The same adapters in a normal (no extra licenses) mode of operation can be used as a software iscsi initiator. In addition, software iscsi boot initiators are becoming a standard part of many vendors uefi implementations. This is creating difficulties during early boot/install determining the proper configuration method for these adapters when they are used as a boot device. The attached patch creates sysfs entries to expose information from the acpi header of the ibft table. This information allows for a method to easily determining if an ibft table was created by a ethernet card's firmware or the system uefi/bios. In the case of a hardware initiator this information in combination with the pci vendor and device id can be used to ascertain any vendor specific behaviors that need to be accommodated. Reviewed-by: Lee Duncan Signed-off-by: David Bond Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/iscsi_boot_sysfs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iscsi_boot_sysfs.h b/include/linux/iscsi_boot_sysfs.h index 548d55395488..10923d730486 100644 --- a/include/linux/iscsi_boot_sysfs.h +++ b/include/linux/iscsi_boot_sysfs.h @@ -64,6 +64,12 @@ enum iscsi_boot_initiator_properties_enum { ISCSI_BOOT_INI_END_MARKER, }; +enum iscsi_boot_acpitbl_properties_enum { + ISCSI_BOOT_ACPITBL_SIGNATURE, + ISCSI_BOOT_ACPITBL_OEM_ID, + ISCSI_BOOT_ACPITBL_OEM_TABLE_ID, +}; + struct attribute_group; struct iscsi_boot_kobj { @@ -127,6 +133,13 @@ iscsi_boot_create_target(struct iscsi_boot_kset *boot_kset, int index, umode_t (*is_visible) (void *data, int type), void (*release) (void *data)); +struct iscsi_boot_kobj * +iscsi_boot_create_acpitbl(struct iscsi_boot_kset *boot_kset, int index, + void *data, + ssize_t (*show)(void *data, int type, char *buf), + umode_t (*is_visible)(void *data, int type), + void (*release)(void *data)); + struct iscsi_boot_kset *iscsi_boot_create_kset(const char *set_name); struct iscsi_boot_kset *iscsi_boot_create_host_kset(unsigned int hostno); void iscsi_boot_destroy_kset(struct iscsi_boot_kset *boot_kset); -- cgit v1.2.3 From 9dc0b289c4c09bc1a92bdcc055cb37af9b72eb28 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Fri, 13 May 2016 12:55:39 +0000 Subject: net/mlx5_core: Firmware commands to support flow counters Getting packet/byte statistics on flows is done through flow counters. Implement the firmware commands to alloc, free and query flow counters. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 99 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4ce4ea422a10..614c795eadea 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -202,6 +202,9 @@ enum { MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936, MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938, + MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, + MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c }; @@ -265,7 +268,8 @@ struct mlx5_ifc_flow_table_fields_supported_bits { struct mlx5_ifc_flow_table_prop_layout_bits { u8 ft_support[0x1]; - u8 reserved_at_1[0x2]; + u8 reserved_at_1[0x1]; + u8 flow_counter[0x1]; u8 flow_modify_en[0x1]; u8 modify_root[0x1]; u8 identified_miss_table_mode[0x1]; @@ -941,6 +945,19 @@ struct mlx5_ifc_dest_format_struct_bits { u8 reserved_at_20[0x20]; }; +struct mlx5_ifc_flow_counter_list_bits { + u8 reserved_at_0[0x10]; + u8 flow_counter_id[0x10]; + + u8 reserved_at_20[0x20]; +}; + +union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits { + struct mlx5_ifc_dest_format_struct_bits dest_format_struct; + struct mlx5_ifc_flow_counter_list_bits flow_counter_list; + u8 reserved_at_0[0x40]; +}; + struct mlx5_ifc_fte_match_param_bits { struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers; @@ -2006,6 +2023,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_ALLOW = 0x1, MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, + MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, }; struct mlx5_ifc_flow_context_bits { @@ -2022,13 +2040,16 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_80[0x8]; u8 destination_list_size[0x18]; - u8 reserved_at_a0[0x160]; + u8 reserved_at_a0[0x8]; + u8 flow_counter_list_size[0x18]; + + u8 reserved_at_c0[0x140]; struct mlx5_ifc_fte_match_param_bits match_value; u8 reserved_at_1200[0x600]; - struct mlx5_ifc_dest_format_struct_bits destination[0]; + union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[0]; }; enum { @@ -3937,6 +3958,34 @@ struct mlx5_ifc_query_flow_group_in_bits { u8 reserved_at_e0[0x120]; }; +struct mlx5_ifc_query_flow_counter_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_traffic_counter_bits flow_statistics[0]; +}; + +struct mlx5_ifc_query_flow_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x80]; + + u8 clear[0x1]; + u8 reserved_at_c1[0xf]; + u8 num_of_counters[0x10]; + + u8 reserved_at_e0[0x10]; + u8 flow_counter_id[0x10]; +}; + struct mlx5_ifc_query_esw_vport_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -5510,6 +5559,28 @@ struct mlx5_ifc_dealloc_pd_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_dealloc_flow_counter_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_dealloc_flow_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 flow_counter_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_create_xrc_srq_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6237,6 +6308,28 @@ struct mlx5_ifc_alloc_pd_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_alloc_flow_counter_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x10]; + u8 flow_counter_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_alloc_flow_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_add_vxlan_udp_dport_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v1.2.3 From bd5251dbf156b6bc0661a9409d46e47160df61dd Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Fri, 13 May 2016 12:55:40 +0000 Subject: net/mlx5_core: Introduce flow steering destination of type counter When adding a flow steering rule with a counter, need to supply a destination of type MLX5_FLOW_DESTINATION_TYPE_COUNTER, with a pointer to a struct mlx5_fc. Also, MLX5_FLOW_CONTEXT_ACTION_COUNT bit should be set in the action. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/fs.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 6467569ad76e..c8b9ede1c20a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -73,6 +73,7 @@ struct mlx5_flow_destination { u32 tir_num; struct mlx5_flow_table *ft; u32 vport_num; + struct mlx5_fc *counter; }; }; @@ -125,4 +126,5 @@ void mlx5_del_flow_rule(struct mlx5_flow_rule *fr); int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, struct mlx5_flow_destination *dest); +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule); #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 614c795eadea..9a05cd7e5890 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -936,6 +936,8 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, + + MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, }; struct mlx5_ifc_dest_format_struct_bits { -- cgit v1.2.3 From 43a335e055bb7ebdc8a68ce7362ef26ef5bda92b Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Fri, 13 May 2016 12:55:41 +0000 Subject: net/mlx5_core: Flow counters infrastructure If a counter has the aging flag set when created, it is added to a list of counters that will be queried periodically from a workqueue. query result and last use timestamp are cached. add/del counter must be very efficient since thousands of such operations might be issued in a second. There is only a single reference to counters without aging, therefore no need for locks. But, counters with aging enabled are stored in a list. In order to make code as lockless as possible, all the list manipulation and access to hardware is done from a single context - the periodic counters query thread. The hardware supports multiple counters per FTE, however currently we are using one counter for each FTE. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 14 ++++++++++++++ include/linux/mlx5/fs.h | 5 +++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9613143f0561..07b504f7eb84 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -457,6 +458,17 @@ struct mlx5_irq_info { char name[MLX5_MAX_IRQ_NAME]; }; +struct mlx5_fc_stats { + struct list_head list; + struct list_head addlist; + /* protect addlist add/splice operations */ + spinlock_t addlist_lock; + + struct workqueue_struct *wq; + struct delayed_work work; + unsigned long next_query; +}; + struct mlx5_eswitch; struct mlx5_priv { @@ -520,6 +532,8 @@ struct mlx5_priv { struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_root_namespace *esw_egress_root_ns; struct mlx5_flow_root_namespace *esw_ingress_root_ns; + + struct mlx5_fc_stats fc_stats; }; enum mlx5_device_state { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index c8b9ede1c20a..4b7a107d9c19 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -127,4 +127,9 @@ int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, struct mlx5_flow_destination *dest); struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule); +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); +void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); +void mlx5_fc_query_cached(struct mlx5_fc *counter, + u64 *bytes, u64 *packets, u64 *lastuse); + #endif -- cgit v1.2.3 From c94987e40ebbae3b7b6c3ece37b6f8338830f6b1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 May 2016 19:08:27 +0200 Subject: bpf: move bpf_jit_enable declaration Move the bpf_jit_enable declaration to the filter.h file where most other core code is declared, also since we're going to add a second knob there. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 ++ include/linux/netdevice.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ec1411c89105..4ff0e647598f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -496,6 +496,8 @@ void bpf_int_jit_compile(struct bpf_prog *fp); bool bpf_helper_changes_skb_data(void *func); #ifdef CONFIG_BPF_JIT +extern int bpf_jit_enable; + typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); struct bpf_binary_header * diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c2f5112f08f7..c148edfe4965 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3759,7 +3759,6 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; extern int weight_p; -extern int bpf_jit_enable; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, -- cgit v1.2.3 From c237ee5eb33bf19fe0591c04ff8db19da7323a83 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 May 2016 19:08:30 +0200 Subject: bpf: add bpf_patch_insn_single helper Move the functionality to patch instructions out of the verifier code and into the core as the new bpf_patch_insn_single() helper will be needed later on for blinding as well. No changes in functionality. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 4ff0e647598f..c4aae496f376 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -495,6 +495,9 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_int_jit_compile(struct bpf_prog *fp); bool bpf_helper_changes_skb_data(void *func); +struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, + const struct bpf_insn *patch, u32 len); + #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; -- cgit v1.2.3 From d1c55ab5e41fcd72cb0a8bef86d3f652ad9ad9f5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 May 2016 19:08:31 +0200 Subject: bpf: prepare bpf_int_jit_compile/bpf_prog_select_runtime apis Since the blinding is strictly only called from inside eBPF JITs, we need to change signatures for bpf_int_jit_compile() and bpf_prog_select_runtime() first in order to prepare that the eBPF program we're dealing with can change underneath. Hence, for call sites, we need to return the latest prog. No functional change in this patch. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index c4aae496f376..891852cf7716 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -458,7 +458,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) int sk_filter(struct sock *sk, struct sk_buff *skb); -int bpf_prog_select_runtime(struct bpf_prog *fp); +struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); void bpf_prog_free(struct bpf_prog *fp); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); @@ -492,7 +492,8 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -void bpf_int_jit_compile(struct bpf_prog *fp); + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); bool bpf_helper_changes_skb_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, -- cgit v1.2.3 From 4f3446bb809f20ad56cadf712e6006815ae7a8f9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 May 2016 19:08:32 +0200 Subject: bpf: add generic constant blinding for use in jits This work adds a generic facility for use from eBPF JIT compilers that allows for further hardening of JIT generated images through blinding constants. In response to the original work on BPF JIT spraying published by Keegan McAllister [1], most BPF JITs were changed to make images read-only and start at a randomized offset in the page, where the rest was filled with trap instructions. We have this nowadays in x86, arm, arm64 and s390 JIT compilers. Additionally, later work also made eBPF interpreter images read only for kernels supporting DEBUG_SET_MODULE_RONX, that is, x86, arm, arm64 and s390 archs as well currently. This is done by default for mentioned JITs when JITing is enabled. Furthermore, we had a generic and configurable constant blinding facility on our todo for quite some time now to further make spraying harder, and first implementation since around netconf 2016. We found that for systems where untrusted users can load cBPF/eBPF code where JIT is enabled, start offset randomization helps a bit to make jumps into crafted payload harder, but in case where larger programs that cross page boundary are injected, we again have some part of the program opcodes at a page start offset. With improved guessing and more reliable payload injection, chances can increase to jump into such payload. Elena Reshetova recently wrote a test case for it [2, 3]. Moreover, eBPF comes with 64 bit constants, which can leave some more room for payloads. Note that for all this, additional bugs in the kernel are still required to make the jump (and of course to guess right, to not jump into a trap) and naturally the JIT must be enabled, which is disabled by default. For helping mitigation, the general idea is to provide an option bpf_jit_harden that admins can tweak along with bpf_jit_enable, so that for cases where JIT should be enabled for performance reasons, the generated image can be further hardened with blinding constants for unpriviledged users (bpf_jit_harden == 1), with trading off performance for these, but not for privileged ones. We also added the option of blinding for all users (bpf_jit_harden == 2), which is quite helpful for testing f.e. with test_bpf.ko. There are no further e.g. hardening levels of bpf_jit_harden switch intended, rationale is to have it dead simple to use as on/off. Since this functionality would need to be duplicated over and over for JIT compilers to use, which are already complex enough, we provide a generic eBPF byte-code level based blinding implementation, which is then just transparently JITed. JIT compilers need to make only a few changes to integrate this facility and can be migrated one by one. This option is for eBPF JITs and will be used in x86, arm64, s390 without too much effort, and soon ppc64 JITs, thus that native eBPF can be blinded as well as cBPF to eBPF migrations, so that both can be covered with a single implementation. The rule for JITs is that bpf_jit_blind_constants() must be called from bpf_int_jit_compile(), and in case blinding is disabled, we follow normally with JITing the passed program. In case blinding is enabled and we fail during the process of blinding itself, we must return with the interpreter. Similarly, in case the JITing process after the blinding failed, we return normally to the interpreter with the non-blinded code. Meaning, interpreter doesn't change in any way and operates on eBPF code as usual. For doing this pre-JIT blinding step, we need to make use of a helper/auxiliary register, here BPF_REG_AX. This is strictly internal to the JIT and not in any way part of the eBPF architecture. Just like in the same way as JITs internally make use of some helper registers when emitting code, only that here the helper register is one abstraction level higher in eBPF bytecode, but nevertheless in JIT phase. That helper register is needed since f.e. manually written program can issue loads to all registers of eBPF architecture. The core concept with the additional register is: blind out all 32 and 64 bit constants by converting BPF_K based instructions into a small sequence from K_VAL into ((RND ^ K_VAL) ^ RND). Therefore, this is transformed into: BPF_REG_AX := (RND ^ K_VAL), BPF_REG_AX ^= RND, and REG BPF_REG_AX, so actual operation on the target register is translated from BPF_K into BPF_X one that is operating on BPF_REG_AX's content. During rewriting phase when blinding, RND is newly generated via prandom_u32() for each processed instruction. 64 bit loads are split into two 32 bit loads to make translation and patching not too complex. Only basic thing required by JITs is to call the helper bpf_jit_blind_constants()/bpf_jit_prog_release_other() pair, and to map BPF_REG_AX into an unused register. Small bpf_jit_disasm extract from [2] when applied to x86 JIT: echo 0 > /proc/sys/net/core/bpf_jit_harden ffffffffa034f5e9 + : [...] 39: mov $0xa8909090,%eax 3e: mov $0xa8909090,%eax 43: mov $0xa8ff3148,%eax 48: mov $0xa89081b4,%eax 4d: mov $0xa8900bb0,%eax 52: mov $0xa810e0c1,%eax 57: mov $0xa8908eb4,%eax 5c: mov $0xa89020b0,%eax [...] echo 1 > /proc/sys/net/core/bpf_jit_harden ffffffffa034f1e5 + : [...] 39: mov $0xe1192563,%r10d 3f: xor $0x4989b5f3,%r10d 46: mov %r10d,%eax 49: mov $0xb8296d93,%r10d 4f: xor $0x10b9fd03,%r10d 56: mov %r10d,%eax 59: mov $0x8c381146,%r10d 5f: xor $0x24c7200e,%r10d 66: mov %r10d,%eax 69: mov $0xeb2a830e,%r10d 6f: xor $0x43ba02ba,%r10d 76: mov %r10d,%eax 79: mov $0xd9730af,%r10d 7f: xor $0xa5073b1f,%r10d 86: mov %r10d,%eax 89: mov $0x9a45662b,%r10d 8f: xor $0x325586ea,%r10d 96: mov %r10d,%eax [...] As can be seen, original constants that carry payload are hidden when enabled, actual operations are transformed from constant-based to register-based ones, making jumps into constants ineffective. Above extract/example uses single BPF load instruction over and over, but of course all instructions with constants are blinded. Performance wise, JIT with blinding performs a bit slower than just JIT and faster than interpreter case. This is expected, since we still get all the performance benefits from JITing and in normal use-cases not every single instruction needs to be blinded. Summing up all 296 test cases averaged over multiple runs from test_bpf.ko suite, interpreter was 55% slower than JIT only and JIT with blinding was 8% slower than JIT only. Since there are also some extremes in the test suite, I expect for ordinary workloads that the performance for the JIT with blinding case is even closer to JIT only case, f.e. nmap test case from suite has averaged timings in ns 29 (JIT), 35 (+ blinding), and 151 (interpreter). BPF test suite, seccomp test suite, eBPF sample code and various bigger networking eBPF programs have been tested with this and were running fine. For testing purposes, I also adapted interpreter and redirected blinded eBPF image to interpreter and also here all tests pass. [1] http://mainisusuallyafunction.blogspot.com/2012/11/attacking-hardened-linux-systems-with.html [2] https://github.com/01org/jit-spray-poc-for-ksp/ [3] http://www.openwall.com/lists/kernel-hardening/2016/05/03/5 Signed-off-by: Daniel Borkmann Reviewed-by: Elena Reshetova Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 891852cf7716..6fc31ef1da2d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -13,6 +13,8 @@ #include #include #include +#include + #include #include @@ -42,6 +44,15 @@ struct bpf_prog_aux; #define BPF_REG_X BPF_REG_7 #define BPF_REG_TMP BPF_REG_8 +/* Kernel hidden auxiliary/helper register for hardening step. + * Only used by eBPF JITs. It's nothing more than a temporary + * register that JITs use internally, only that here it's part + * of eBPF instructions that have been rewritten for blinding + * constants. See JIT pre-step in bpf_jit_blind_constants(). + */ +#define BPF_REG_AX MAX_BPF_REG +#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) + /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 @@ -501,6 +512,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; +extern int bpf_jit_harden; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); @@ -513,6 +525,9 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr); void bpf_jit_compile(struct bpf_prog *fp); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); +void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); + static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) { @@ -523,6 +538,33 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 16, 1, image, proglen, false); } + +static inline bool bpf_jit_is_ebpf(void) +{ +# ifdef CONFIG_HAVE_EBPF_JIT + return true; +# else + return false; +# endif +} + +static inline bool bpf_jit_blinding_enabled(void) +{ + /* These are the prerequisites, should someone ever have the + * idea to call blinding outside of them, we make sure to + * bail out. + */ + if (!bpf_jit_is_ebpf()) + return false; + if (!bpf_jit_enable) + return false; + if (!bpf_jit_harden) + return false; + if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN)) + return false; + + return true; +} #else static inline void bpf_jit_compile(struct bpf_prog *fp) { -- cgit v1.2.3 From 92efb1bd9bcbdf83cc0e6cfead752d0c82f63677 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 16 May 2016 15:12:02 -0500 Subject: PCI: Identify Enhanced Allocation (EA) BAR Equivalent resources in sysfs Resource flags are exposed to userspace via the sysfs "resource" file. lspci reads the sysfs file to determine resource properties. Add a "BAR Equivalent Indicator" flag so lspci can distinguish between [virtual] and [enhanced] resources. Signed-off-by: Alex Williamson Signed-off-by: Sean O. Stalley Signed-off-by: Bjorn Helgaas --- include/linux/ioport.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 0b65543dc6cf..6230064d7f95 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -26,6 +26,9 @@ struct resource { /* * IO resources have these defined flags. + * + * PCI devices expose these flags to userspace in the "resource" sysfs file, + * so don't move them. */ #define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */ @@ -110,6 +113,7 @@ struct resource { /* PCI control bits. Shares IORESOURCE_BITS with above PCI ROM. */ #define IORESOURCE_PCI_FIXED (1<<4) /* Do not move resource */ +#define IORESOURCE_PCI_EA_BEI (1<<5) /* BAR Equivalent Indicator */ /* * I/O Resource Descriptors -- cgit v1.2.3 From e4b2749158631e6d74bf14d2ef3804d780e2f770 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 11 May 2016 11:58:47 +0200 Subject: DAX: move RADIX_DAX_ definitions to dax.c These don't belong in radix-tree.c any more than PAGECACHE_TAG_* do. Let's try to maintain the idea that radix-tree simply implements an abstract data type. Acked-by: Ross Zwisler Reviewed-by: Matthew Wilcox Signed-off-by: NeilBrown Signed-off-by: Jan Kara Signed-off-by: Vishal Verma --- include/linux/radix-tree.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 51a97ac8bfbf..d08d6ec3bf53 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -52,15 +52,6 @@ #define RADIX_TREE_EXCEPTIONAL_ENTRY 2 #define RADIX_TREE_EXCEPTIONAL_SHIFT 2 -#define RADIX_DAX_MASK 0xf -#define RADIX_DAX_SHIFT 4 -#define RADIX_DAX_PTE (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY) -#define RADIX_DAX_PMD (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY) -#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK) -#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT)) -#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \ - RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE))) - static inline int radix_tree_is_indirect_ptr(void *ptr) { return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR); -- cgit v1.2.3 From 02fbd139759feb1f331cebd858523b5d774082e6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 May 2016 11:58:48 +0200 Subject: dax: Remove complete_unwritten argument Fault handlers currently take complete_unwritten argument to convert unwritten extents after PTEs are updated. However no filesystem uses this anymore as the code is racy. Remove the unused argument. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Vishal Verma --- include/linux/dax.h | 17 +++++++---------- include/linux/fs.h | 1 - 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 636dd59ab505..7c45ac7ea1d1 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -10,10 +10,8 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); -int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, - dax_iodone_t); -int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, - dax_iodone_t); +int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); #ifdef CONFIG_FS_DAX struct page *read_dax_sector(struct block_device *bdev, sector_t n); @@ -27,21 +25,20 @@ static inline struct page *read_dax_sector(struct block_device *bdev, #ifdef CONFIG_TRANSPARENT_HUGEPAGE int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, - unsigned int flags, get_block_t, dax_iodone_t); + unsigned int flags, get_block_t); int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, - unsigned int flags, get_block_t, dax_iodone_t); + unsigned int flags, get_block_t); #else static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, unsigned int flags, get_block_t gb, - dax_iodone_t di) + pmd_t *pmd, unsigned int flags, get_block_t gb) { return VM_FAULT_FALLBACK; } #define __dax_pmd_fault dax_pmd_fault #endif int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); -#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) -#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) +#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) +#define __dax_mkwrite(vma, vmf, gb) __dax_fault(vma, vmf, gb) static inline bool vma_is_dax(struct vm_area_struct *vma) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 70e61b58baaf..9f2813090d1b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -74,7 +74,6 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); -typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 -- cgit v1.2.3 From cfbcf468454ab4b20f0b4b62da51920b99fdb19e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 28 Apr 2016 12:30:53 -0300 Subject: perf core: Pass max stack as a perf_callchain_entry context This makes perf_callchain_{user,kernel}() receive the max stack as context for the perf_callchain_entry, instead of accessing the global sysctl_perf_event_max_stack. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: He Kuang Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9e1c3ada91c4..dbd18246b36e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -61,6 +61,11 @@ struct perf_callchain_entry { __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ }; +struct perf_callchain_entry_ctx { + struct perf_callchain_entry *entry; + u32 max_stack; +}; + struct perf_raw_record { u32 size; void *data; @@ -1063,19 +1068,20 @@ extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); -extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); -extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); +extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, - bool crosstask, bool add_mark); + u32 max_stack, bool crosstask, bool add_mark); extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; -static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - if (entry->nr < sysctl_perf_event_max_stack) { + struct perf_callchain_entry *entry = ctx->entry; + if (entry->nr < ctx->max_stack) { entry->ip[entry->nr++] = ip; return 0; } else { -- cgit v1.2.3 From 3b1fff08038bd0792b1aa1e9703b2dd0512a3fd0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 May 2016 18:08:32 -0300 Subject: perf core: Add a 'nr' field to perf_event_callchain_context We will use it to count how many addresses are in the entry->ip[] array, excluding PERF_CONTEXT_{KERNEL,USER,etc} entries, so that we can really return the number of entries specified by the user via the relevant sysctl, kernel.perf_event_max_contexts, or via the per event perf_event_attr.sample_max_stack knob. This way we keep the perf_sample->ip_callchain->nr meaning, that is the number of entries, be it real addresses or PERF_CONTEXT_ entries, while honouring the max_stack knobs, i.e. the end result will be max_stack entries if we have at least that many entries in a given stack trace. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-s8teto51tdqvlfhefndtat9r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index dbd18246b36e..3803bb1a862b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -64,6 +64,7 @@ struct perf_callchain_entry { struct perf_callchain_entry_ctx { struct perf_callchain_entry *entry; u32 max_stack; + u32 nr; }; struct perf_raw_record { @@ -1080,9 +1081,10 @@ extern int sysctl_perf_event_max_stack; static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - struct perf_callchain_entry *entry = ctx->entry; - if (entry->nr < ctx->max_stack) { + if (ctx->nr < ctx->max_stack) { + struct perf_callchain_entry *entry = ctx->entry; entry->ip[entry->nr++] = ip; + ++ctx->nr; return 0; } else { return -1; /* no more room, stop walking the stack */ -- cgit v1.2.3 From 3e4de4ec4cfea40994b47a79767610153edbf45b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 May 2016 13:01:50 -0300 Subject: perf core: Add perf_callchain_store_context() helper We need have different helpers to account how many contexts we have in the sample and for real addresses, so do it now as a prep patch, to ease review. Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-q964tnyuqrxw5gld18vizs3c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3803bb1a862b..2024b14cc2b1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1079,6 +1079,8 @@ extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; +#define perf_callchain_store_context(ctx, context) perf_callchain_store(ctx, context) + static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { if (ctx->nr < ctx->max_stack) { -- cgit v1.2.3 From c85b03349640b34f3545503c8429fc43005e9a92 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 May 2016 13:06:21 -0300 Subject: perf core: Separate accounting of contexts and real addresses in a stack trace The perf_sample->ip_callchain->nr value includes all the entries in the ip_callchain->ip[] array, real addresses and PERF_CONTEXT_{KERNEL,USER,etc}, while what the user expects is that what is in the kernel.perf_event_max_stack sysctl or in the upcoming per event perf_event_attr.sample_max_stack knob be honoured in terms of IP addresses in the stack trace. So allocate a bunch of extra entries for contexts, and do the accounting via perf_callchain_entry_ctx struct members. A new sysctl, kernel.perf_event_max_contexts_per_stack is also introduced for investigating possible bugs in the callchain implementation by some arch. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: He Kuang Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/n/tip-3b4wnqk340c4sg4gwkfdi9yk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2024b14cc2b1..6b87be908790 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -65,6 +65,8 @@ struct perf_callchain_entry_ctx { struct perf_callchain_entry *entry; u32 max_stack; u32 nr; + short contexts; + bool contexts_maxed; }; struct perf_raw_record { @@ -1078,12 +1080,24 @@ extern int get_callchain_buffers(void); extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_contexts_per_stack; -#define perf_callchain_store_context(ctx, context) perf_callchain_store(ctx, context) +static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip) +{ + if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { + struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; + ++ctx->contexts; + return 0; + } else { + ctx->contexts_maxed = true; + return -1; /* no more room, stop walking the stack */ + } +} static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - if (ctx->nr < ctx->max_stack) { + if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { struct perf_callchain_entry *entry = ctx->entry; entry->ip[entry->nr++] = ip; ++ctx->nr; -- cgit v1.2.3 From 2af3a8159cd204fc8437ed2f75863f0fb930f0d0 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 10 May 2016 10:23:52 -0600 Subject: block: Add vfs_msg() interface In preparation of moving DAX capability checks to the block layer from filesystem code, add a VFS message interface that aligns with filesystem's message format. For instance, a vfs_msg() message followed by XFS messages in case of a dax mount error may look like: VFS (pmem0p1): error: unaligned partition for dax XFS (pmem0p1): DAX unsupported by block device. Turning off DAX. XFS (pmem0p1): Mounting V5 Filesystem : vfs_msg() is largely based on ext4_msg(). Signed-off-by: Toshi Kani Reviewed-by: Christoph Hellwig Cc: Alexander Viro Cc: Jens Axboe Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Jan Kara Cc: Dave Chinner Cc: Dan Williams Cc: Ross Zwisler Cc: Christoph Hellwig Cc: Boaz Harrosh Signed-off-by: Vishal Verma --- include/linux/blkdev.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 669e419d6234..78c48ab22f46 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -767,6 +767,17 @@ static inline void rq_flush_dcache_pages(struct request *rq) } #endif +#ifdef CONFIG_PRINTK +#define vfs_msg(sb, level, fmt, ...) \ + __vfs_msg(sb, level, fmt, ##__VA_ARGS__) +#else +#define vfs_msg(sb, level, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __vfs_msg(sb, "", " "); \ +} while (0) +#endif + extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); -- cgit v1.2.3 From 2d96afc8f70ef86c66a0b5d80c24a27d6dd13df3 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 10 May 2016 10:23:53 -0600 Subject: block: Add bdev_dax_supported() for dax mount checks DAX imposes additional requirements to a device. Add bdev_dax_supported() which performs all the precondition checks necessary for filesystem to mount the device with dax option. Also add a new check to verify if a partition is aligned by 4KB. When a partition is unaligned, any dax read/write access fails, except for metadata update. Signed-off-by: Toshi Kani Reviewed-by: Christoph Hellwig Cc: Alexander Viro Cc: Jens Axboe Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Jan Kara Cc: Dave Chinner Cc: Dan Williams Cc: Ross Zwisler Cc: Christoph Hellwig Cc: Boaz Harrosh Signed-off-by: Vishal Verma --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 78c48ab22f46..71231a55debd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1688,6 +1688,7 @@ extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); +extern int bdev_dax_supported(struct super_block *, int); #else /* CONFIG_BLOCK */ struct block_device; -- cgit v1.2.3 From a8078b1fc616da6112eb95f0063cd34531d4ccf0 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 10 May 2016 10:23:57 -0600 Subject: block: Update blkdev_dax_capable() for consistency blkdev_dax_capable() is similar to bdev_dax_supported(), but needs to remain as a separate interface for checking dax capability of a raw block device. Rename and relocate blkdev_dax_capable() to keep them maintained consistently, and call bdev_direct_access() for the dax capability check. There is no change in the behavior. Link: https://lkml.org/lkml/2016/5/9/950 Signed-off-by: Toshi Kani Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Jens Axboe Cc: Andreas Dilger Cc: Jan Kara Cc: Dave Chinner Cc: Dan Williams Cc: Ross Zwisler Cc: Christoph Hellwig Cc: Boaz Harrosh Signed-off-by: Vishal Verma --- include/linux/blkdev.h | 1 + include/linux/fs.h | 8 -------- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 71231a55debd..27cbefe8c985 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1689,6 +1689,7 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); extern int bdev_dax_supported(struct super_block *, int); +extern bool bdev_dax_capable(struct block_device *); #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9f2813090d1b..17f934fcf564 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2319,14 +2319,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -#ifdef CONFIG_FS_DAX -extern bool blkdev_dax_capable(struct block_device *bdev); -#else -static inline bool blkdev_dax_capable(struct block_device *bdev) -{ - return false; -} -#endif extern struct super_block *blockdev_superblock; -- cgit v1.2.3 From 459a25afe97cb3d7f978b90c881f4d7aac8fb755 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Wed, 30 Mar 2016 22:03:27 +0200 Subject: pwm: Get rid of pwm->lock PWM devices are not protected against concurrent accesses. The lock in struct pwm_device might let PWM users think it is, but it's actually only protecting the enabled state. Removing this lock should be fine as long as all PWM users are aware that accesses to the PWM device have to be serialized, which seems to be the case for all of them except the sysfs interface. Patch the sysfs code by adding a lock to the pwm_export struct and making sure it's taken for all relevant accesses to the exported PWM device. Signed-off-by: Boris Brezillon Signed-off-by: Thierry Reding --- include/linux/pwm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index b78d27c42629..d2e7430ccedb 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -106,7 +106,6 @@ enum { * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device * @chip_data: chip-private data associated with the PWM device - * @lock: used to serialize accesses to the PWM device where necessary * @period: period of the PWM signal (in nanoseconds) * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) * @polarity: polarity of the PWM signal @@ -119,7 +118,6 @@ struct pwm_device { unsigned int pwm; struct pwm_chip *chip; void *chip_data; - struct mutex lock; unsigned int period; unsigned int duty_cycle; -- cgit v1.2.3 From a8c3862551e063344f80c3e05d595f9d8836f355 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 14 Apr 2016 21:17:37 +0200 Subject: pwm: Keep PWM state in sync with hardware state Before the introduction of pwm_args, the core was resetting the PWM period and polarity states to the reference values (those provided through the DT, a PWM lookup table or hardcoded in the driver). Now that all PWM users are correctly using pwm_args to configure their PWM device, we can safely remove the pwm_apply_args() call in pwm_get() and of_pwm_get(). We can also get rid of the pwm_set_period() call in pwm_apply_args(), because PWM users are now directly using pargs->period instead of pwm_get_period(). By doing that we avoid messing with the current PWM period. The only remaining bit in pwm_apply_args() is the initial polarity setting, and it should go away when all PWM users have been patched to use the atomic API (with this API the polarity will be set along with other PWM arguments when configuring the PWM). Signed-off-by: Boris Brezillon Signed-off-by: Thierry Reding --- include/linux/pwm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d2e7430ccedb..7caf549f720e 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -171,7 +171,6 @@ static inline void pwm_get_args(const struct pwm_device *pwm, static inline void pwm_apply_args(struct pwm_device *pwm) { - pwm_set_period(pwm, pwm->args.period); pwm_set_polarity(pwm, pwm->args.polarity); } -- cgit v1.2.3 From 43a276b003ed2e03de9d94b02a1ba49c1849b931 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 14 Apr 2016 21:17:38 +0200 Subject: pwm: Introduce the pwm_state concept The PWM state, represented by its period, duty_cycle and polarity is currently directly stored in the PWM device. Declare a pwm_state structure embedding those field so that we can later use this struct to atomically update all the PWM parameters at once. All pwm_get_xxx() helpers are now implemented as wrappers around pwm_get_state(). Signed-off-by: Boris Brezillon Signed-off-by: Thierry Reding --- include/linux/pwm.h | 54 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 7caf549f720e..51d4005418f9 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -98,6 +98,18 @@ enum { PWMF_EXPORTED = 1 << 2, }; +/* + * struct pwm_state - state of a PWM channel + * @period: PWM period (in nanoseconds) + * @duty_cycle: PWM duty cycle (in nanoseconds) + * @polarity: PWM polarity + */ +struct pwm_state { + unsigned int period; + unsigned int duty_cycle; + enum pwm_polarity polarity; +}; + /** * struct pwm_device - PWM channel object * @label: name of the PWM device @@ -106,10 +118,8 @@ enum { * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device * @chip_data: chip-private data associated with the PWM device - * @period: period of the PWM signal (in nanoseconds) - * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) - * @polarity: polarity of the PWM signal * @args: PWM arguments + * @state: curent PWM channel state */ struct pwm_device { const char *label; @@ -119,13 +129,21 @@ struct pwm_device { struct pwm_chip *chip; void *chip_data; - unsigned int period; - unsigned int duty_cycle; - enum pwm_polarity polarity; - struct pwm_args args; + struct pwm_state state; }; +/** + * pwm_get_state() - retrieve the current PWM state + * @pwm: PWM device + * @state: state to fill with the current PWM state + */ +static inline void pwm_get_state(const struct pwm_device *pwm, + struct pwm_state *state) +{ + *state = pwm->state; +} + static inline bool pwm_is_enabled(const struct pwm_device *pwm) { return test_bit(PWMF_ENABLED, &pwm->flags); @@ -134,23 +152,31 @@ static inline bool pwm_is_enabled(const struct pwm_device *pwm) static inline void pwm_set_period(struct pwm_device *pwm, unsigned int period) { if (pwm) - pwm->period = period; + pwm->state.period = period; } static inline unsigned int pwm_get_period(const struct pwm_device *pwm) { - return pwm ? pwm->period : 0; + struct pwm_state state; + + pwm_get_state(pwm, &state); + + return state.period; } static inline void pwm_set_duty_cycle(struct pwm_device *pwm, unsigned int duty) { if (pwm) - pwm->duty_cycle = duty; + pwm->state.duty_cycle = duty; } static inline unsigned int pwm_get_duty_cycle(const struct pwm_device *pwm) { - return pwm ? pwm->duty_cycle : 0; + struct pwm_state state; + + pwm_get_state(pwm, &state); + + return state.duty_cycle; } /* @@ -160,7 +186,11 @@ int pwm_set_polarity(struct pwm_device *pwm, enum pwm_polarity polarity); static inline enum pwm_polarity pwm_get_polarity(const struct pwm_device *pwm) { - return pwm ? pwm->polarity : PWM_POLARITY_NORMAL; + struct pwm_state state; + + pwm_get_state(pwm, &state); + + return state.polarity; } static inline void pwm_get_args(const struct pwm_device *pwm, -- cgit v1.2.3 From 09a7e4a3d9fcb95ade2cb02167e85fbeb8315ce0 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 14 Apr 2016 21:17:39 +0200 Subject: pwm: Move the enabled/disabled info into pwm_state Prepare the transition to PWM atomic update by moving the enabled and disabled state into the pwm_state struct. This way we can easily update the whole PWM state by copying the new state in the ->state field. Signed-off-by: Boris Brezillon Signed-off-by: Thierry Reding --- include/linux/pwm.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 51d4005418f9..150563a806d6 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -94,8 +94,7 @@ struct pwm_args { enum { PWMF_REQUESTED = 1 << 0, - PWMF_ENABLED = 1 << 1, - PWMF_EXPORTED = 1 << 2, + PWMF_EXPORTED = 1 << 1, }; /* @@ -103,11 +102,13 @@ enum { * @period: PWM period (in nanoseconds) * @duty_cycle: PWM duty cycle (in nanoseconds) * @polarity: PWM polarity + * @enabled: PWM enabled status */ struct pwm_state { unsigned int period; unsigned int duty_cycle; enum pwm_polarity polarity; + bool enabled; }; /** @@ -146,7 +147,11 @@ static inline void pwm_get_state(const struct pwm_device *pwm, static inline bool pwm_is_enabled(const struct pwm_device *pwm) { - return test_bit(PWMF_ENABLED, &pwm->flags); + struct pwm_state state; + + pwm_get_state(pwm, &state); + + return state.enabled; } static inline void pwm_set_period(struct pwm_device *pwm, unsigned int period) -- cgit v1.2.3 From 15fa8a43c147213a9563903c87b29671035eb6e8 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 14 Apr 2016 21:17:40 +0200 Subject: pwm: Add hardware readout infrastructure Add a ->get_state() function to the pwm_ops struct to let PWM drivers initialize the PWM state attached to a PWM device. Signed-off-by: Boris Brezillon Signed-off-by: Thierry Reding --- include/linux/pwm.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 150563a806d6..33f8decd9f38 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -206,6 +206,29 @@ static inline void pwm_get_args(const struct pwm_device *pwm, static inline void pwm_apply_args(struct pwm_device *pwm) { + /* + * PWM users calling pwm_apply_args() expect to have a fresh config + * where the polarity and period are set according to pwm_args info. + * The problem is, polarity can only be changed when the PWM is + * disabled. + * + * PWM drivers supporting hardware readout may declare the PWM device + * as enabled, and prevent polarity setting, which changes from the + * existing behavior, where all PWM devices are declared as disabled + * at startup (even if they are actually enabled), thus authorizing + * polarity setting. + * + * Instead of setting ->enabled to false, we call pwm_disable() + * before pwm_set_polarity() to ensure that everything is configured + * as expected, and the PWM is really disabled when the user request + * it. + * + * Note that PWM users requiring a smooth handover between the + * bootloader and the kernel (like critical regulators controlled by + * PWM devices) will have to switch to the atomic API and avoid calling + * pwm_apply_args(). + */ + pwm_disable(pwm); pwm_set_polarity(pwm, pwm->args.polarity); } @@ -217,6 +240,9 @@ static inline void pwm_apply_args(struct pwm_device *pwm) * @set_polarity: configure the polarity of this PWM * @enable: enable PWM output toggling * @disable: disable PWM output toggling + * @get_state: get the current PWM state. This function is only + * called once per PWM device when the PWM chip is + * registered. * @dbg_show: optional routine to show contents in debugfs * @owner: helps prevent removal of modules exporting active PWMs */ @@ -229,6 +255,8 @@ struct pwm_ops { enum pwm_polarity polarity); int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm); void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm); + void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state); #ifdef CONFIG_DEBUG_FS void (*dbg_show)(struct pwm_chip *chip, struct seq_file *s); #endif -- cgit v1.2.3 From 5ec803edcb703fe379836f13560b79dfac79b01d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 14 Apr 2016 21:17:41 +0200 Subject: pwm: Add core infrastructure to allow atomic updates Add an ->apply() method to the pwm_ops struct to allow PWM drivers to implement atomic updates. This method is preferred over the ->enable(), ->disable() and ->config() methods if available. Add the pwm_apply_state() function to the PWM user API. Note that the pwm_apply_state() does not guarantee the atomicity of the update operation, it all depends on the availability and implementation of the ->apply() method. pwm_enable/disable/set_polarity/config() are now implemented as wrappers around the pwm_apply_state() function. pwm_adjust_config() is allowing smooth handover between the bootloader and the kernel. This function tries to adapt the current PWM state to the PWM arguments coming from a PWM lookup table or a DT definition without changing the duty_cycle/period proportion. Signed-off-by: Boris Brezillon [thierry.reding@gmail.com: fix a couple of typos] Signed-off-by: Thierry Reding --- include/linux/pwm.h | 269 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 184 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 33f8decd9f38..17018f3c066e 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -5,59 +5,7 @@ #include #include -struct pwm_device; struct seq_file; - -#if IS_ENABLED(CONFIG_PWM) -/* - * pwm_request - request a PWM device - */ -struct pwm_device *pwm_request(int pwm_id, const char *label); - -/* - * pwm_free - free a PWM device - */ -void pwm_free(struct pwm_device *pwm); - -/* - * pwm_config - change a PWM device configuration - */ -int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns); - -/* - * pwm_enable - start a PWM output toggling - */ -int pwm_enable(struct pwm_device *pwm); - -/* - * pwm_disable - stop a PWM output toggling - */ -void pwm_disable(struct pwm_device *pwm); -#else -static inline struct pwm_device *pwm_request(int pwm_id, const char *label) -{ - return ERR_PTR(-ENODEV); -} - -static inline void pwm_free(struct pwm_device *pwm) -{ -} - -static inline int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns) -{ - return -EINVAL; -} - -static inline int pwm_enable(struct pwm_device *pwm) -{ - return -EINVAL; -} - -static inline void pwm_disable(struct pwm_device *pwm) -{ -} -#endif - struct pwm_chip; /** @@ -184,11 +132,6 @@ static inline unsigned int pwm_get_duty_cycle(const struct pwm_device *pwm) return state.duty_cycle; } -/* - * pwm_set_polarity - configure the polarity of a PWM signal - */ -int pwm_set_polarity(struct pwm_device *pwm, enum pwm_polarity polarity); - static inline enum pwm_polarity pwm_get_polarity(const struct pwm_device *pwm) { struct pwm_state state; @@ -204,34 +147,6 @@ static inline void pwm_get_args(const struct pwm_device *pwm, *args = pwm->args; } -static inline void pwm_apply_args(struct pwm_device *pwm) -{ - /* - * PWM users calling pwm_apply_args() expect to have a fresh config - * where the polarity and period are set according to pwm_args info. - * The problem is, polarity can only be changed when the PWM is - * disabled. - * - * PWM drivers supporting hardware readout may declare the PWM device - * as enabled, and prevent polarity setting, which changes from the - * existing behavior, where all PWM devices are declared as disabled - * at startup (even if they are actually enabled), thus authorizing - * polarity setting. - * - * Instead of setting ->enabled to false, we call pwm_disable() - * before pwm_set_polarity() to ensure that everything is configured - * as expected, and the PWM is really disabled when the user request - * it. - * - * Note that PWM users requiring a smooth handover between the - * bootloader and the kernel (like critical regulators controlled by - * PWM devices) will have to switch to the atomic API and avoid calling - * pwm_apply_args(). - */ - pwm_disable(pwm); - pwm_set_polarity(pwm, pwm->args.polarity); -} - /** * struct pwm_ops - PWM controller operations * @request: optional hook for requesting a PWM @@ -240,6 +155,10 @@ static inline void pwm_apply_args(struct pwm_device *pwm) * @set_polarity: configure the polarity of this PWM * @enable: enable PWM output toggling * @disable: disable PWM output toggling + * @apply: atomically apply a new PWM config. The state argument + * should be adjusted with the real hardware config (if the + * approximate the period or duty_cycle value, state should + * reflect it) * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. @@ -255,6 +174,8 @@ struct pwm_ops { enum pwm_polarity polarity); int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm); void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm); + int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state); void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); #ifdef CONFIG_DEBUG_FS @@ -292,6 +213,115 @@ struct pwm_chip { }; #if IS_ENABLED(CONFIG_PWM) +/* PWM user APIs */ +struct pwm_device *pwm_request(int pwm_id, const char *label); +void pwm_free(struct pwm_device *pwm); +int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state); +int pwm_adjust_config(struct pwm_device *pwm); + +/** + * pwm_config() - change a PWM device configuration + * @pwm: PWM device + * @duty_ns: "on" time (in nanoseconds) + * @period_ns: duration (in nanoseconds) of one cycle + * + * Returns: 0 on success or a negative error code on failure. + */ +static inline int pwm_config(struct pwm_device *pwm, int duty_ns, + int period_ns) +{ + struct pwm_state state; + + if (!pwm) + return -EINVAL; + + pwm_get_state(pwm, &state); + if (state.duty_cycle == duty_ns && state.period == period_ns) + return 0; + + state.duty_cycle = duty_ns; + state.period = period_ns; + return pwm_apply_state(pwm, &state); +} + +/** + * pwm_set_polarity() - configure the polarity of a PWM signal + * @pwm: PWM device + * @polarity: new polarity of the PWM signal + * + * Note that the polarity cannot be configured while the PWM device is + * enabled. + * + * Returns: 0 on success or a negative error code on failure. + */ +static inline int pwm_set_polarity(struct pwm_device *pwm, + enum pwm_polarity polarity) +{ + struct pwm_state state; + + if (!pwm) + return -EINVAL; + + pwm_get_state(pwm, &state); + if (state.polarity == polarity) + return 0; + + /* + * Changing the polarity of a running PWM without adjusting the + * dutycycle/period value is a bit risky (can introduce glitches). + * Return -EBUSY in this case. + * Note that this is allowed when using pwm_apply_state() because + * the user specifies all the parameters. + */ + if (state.enabled) + return -EBUSY; + + state.polarity = polarity; + return pwm_apply_state(pwm, &state); +} + +/** + * pwm_enable() - start a PWM output toggling + * @pwm: PWM device + * + * Returns: 0 on success or a negative error code on failure. + */ +static inline int pwm_enable(struct pwm_device *pwm) +{ + struct pwm_state state; + + if (!pwm) + return -EINVAL; + + pwm_get_state(pwm, &state); + if (state.enabled) + return 0; + + state.enabled = true; + return pwm_apply_state(pwm, &state); +} + +/** + * pwm_disable() - stop a PWM output toggling + * @pwm: PWM device + */ +static inline void pwm_disable(struct pwm_device *pwm) +{ + struct pwm_state state; + + if (!pwm) + return; + + pwm_get_state(pwm, &state); + if (!state.enabled) + return; + + state.enabled = false; + pwm_apply_state(pwm, &state); +} + + +/* PWM provider APIs */ int pwm_set_chip_data(struct pwm_device *pwm, void *data); void *pwm_get_chip_data(struct pwm_device *pwm); @@ -317,6 +347,47 @@ void devm_pwm_put(struct device *dev, struct pwm_device *pwm); bool pwm_can_sleep(struct pwm_device *pwm); #else +static inline struct pwm_device *pwm_request(int pwm_id, const char *label) +{ + return ERR_PTR(-ENODEV); +} + +static inline void pwm_free(struct pwm_device *pwm) +{ +} + +static inline int pwm_apply_state(struct pwm_device *pwm, + const struct pwm_state *state) +{ + return -ENOTSUPP; +} + +static inline int pwm_adjust_config(struct pwm_device *pwm) +{ + return -ENOTSUPP; +} + +static inline int pwm_config(struct pwm_device *pwm, int duty_ns, + int period_ns) +{ + return -EINVAL; +} + +static inline int pwm_set_polarity(struct pwm_device *pwm, + enum pwm_polarity polarity) +{ + return -ENOTSUPP; +} + +static inline int pwm_enable(struct pwm_device *pwm) +{ + return -EINVAL; +} + +static inline void pwm_disable(struct pwm_device *pwm) +{ +} + static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) { return -EINVAL; @@ -388,6 +459,34 @@ static inline bool pwm_can_sleep(struct pwm_device *pwm) } #endif +static inline void pwm_apply_args(struct pwm_device *pwm) +{ + /* + * PWM users calling pwm_apply_args() expect to have a fresh config + * where the polarity and period are set according to pwm_args info. + * The problem is, polarity can only be changed when the PWM is + * disabled. + * + * PWM drivers supporting hardware readout may declare the PWM device + * as enabled, and prevent polarity setting, which changes from the + * existing behavior, where all PWM devices are declared as disabled + * at startup (even if they are actually enabled), thus authorizing + * polarity setting. + * + * Instead of setting ->enabled to false, we call pwm_disable() + * before pwm_set_polarity() to ensure that everything is configured + * as expected, and the PWM is really disabled when the user request + * it. + * + * Note that PWM users requiring a smooth handover between the + * bootloader and the kernel (like critical regulators controlled by + * PWM devices) will have to switch to the atomic API and avoid calling + * pwm_apply_args(). + */ + pwm_disable(pwm); + pwm_set_polarity(pwm, pwm->args.polarity); +} + struct pwm_lookup { struct list_head list; const char *provider; -- cgit v1.2.3 From c35095215a80b13af2f521e1ab1c727a5224e53b Mon Sep 17 00:00:00 2001 From: Wei Ni Date: Tue, 29 Mar 2016 18:29:17 +0800 Subject: thermal: of-thermal: allow setting trip_temp on hardware In current of-thermal, the .set_trip_temp only support to set trip_temp for SW. But some sensors support to set trip_temp on hardware, so that can trigger interrupt, shutdown or any other events. This patch adds .set_trip_temp() callback in thermal_zone_of_device_ops{}, so that the sensor device can use it to set trip_temp on hardware. Signed-off-by: Wei Ni Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 1b8a5a7876ce..e45abe7db9a6 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -340,6 +340,7 @@ struct thermal_zone_of_device_ops { int (*get_temp)(void *, int *); int (*get_trend)(void *, long *); int (*set_emul_temp)(void *, int); + int (*set_trip_temp)(void *, int, int); }; /** -- cgit v1.2.3 From 7ff2760999a86e4d2b1af93dcf0f0d336c309571 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 16 May 2016 15:35:24 +0300 Subject: mmc: core: Add a facility to "pause" re-tuning Re-tuning is not possible when switched to the RPMB partition. However re-tuning should not be needed if re-tuning is done immediately before switching, a small set of operations is done, and then we immediately switch back to the main partition. To ensure that re-tuning can't be done for a short while, add a facility to "pause" re-tuning. The existing facility to hold / release re-tuning is used but it also flags re-tuning as needed to cause re-tuning before the next command (which will be the switch to RPMB). We also need to "unpause" in the recovery path, which is catered for by adding it to mmc_retune_disable(). Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 85800b48241f..45cde8cd39f2 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -329,6 +329,7 @@ struct mmc_host { unsigned int can_retune:1; /* re-tuning can be used */ unsigned int doing_retune:1; /* re-tuning in progress */ unsigned int retune_now:1; /* do re-tuning at next req */ + unsigned int retune_paused:1; /* re-tuning is temporarily disabled */ int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ @@ -526,4 +527,7 @@ static inline void mmc_retune_recheck(struct mmc_host *host) host->retune_now = 1; } +void mmc_retune_pause(struct mmc_host *host); +void mmc_retune_unpause(struct mmc_host *host); + #endif /* LINUX_MMC_HOST_H */ -- cgit v1.2.3 From 39651abd28146fff2bfac63d68a7a56250a4aead Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 17 May 2016 06:44:26 -0400 Subject: qed: add support for dcbx. This patch adds the necessary driver support for Management Firmware to configure the device/firmware with the dcbx results. Management Firmware is responsible for communicating the DCBX and driving the negotiation, but the driver has responsibility of receiving async notification and configuring the results in hw/fw. This patch also adds the dcbx support for future protocols (e.g., FCoE) as preparation to their imminent submission. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 0fd8f247e65f..4c29439f54bf 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -25,6 +25,15 @@ #include #include +enum dcbx_protocol_type { + DCBX_PROTOCOL_ISCSI, + DCBX_PROTOCOL_FCOE, + DCBX_PROTOCOL_ROCE, + DCBX_PROTOCOL_ROCE_V2, + DCBX_PROTOCOL_ETH, + DCBX_MAX_PROTOCOL_TYPE +}; + enum qed_led_mode { QED_LED_MODE_OFF, QED_LED_MODE_ON, -- cgit v1.2.3 From ec4c436652cfdf5dcad60f553020ee99596b937b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 17 May 2016 18:06:18 +0100 Subject: regulator: Silence build warnings from regulator_can_change_voltage() Cut down on noise for mainstream users of the API and people doing build testing by dropping the deprecated flag from regulator_can_change_voltage() as it triggers even on the EXPORT_SYMBOL_GPL() which affects all builds rather than just the remaining drivers with calls to it (for which fixes are currently pending). The function remains deprecated and is expected to be removed entirely in v4.8. Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 80dc4e51d14a..48603506f8de 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -224,7 +224,7 @@ int regulator_bulk_force_disable(int num_consumers, void regulator_bulk_free(int num_consumers, struct regulator_bulk_data *consumers); -int __deprecated regulator_can_change_voltage(struct regulator *regulator); +int regulator_can_change_voltage(struct regulator *regulator); int regulator_count_voltages(struct regulator *regulator); int regulator_list_voltage(struct regulator *regulator, unsigned selector); int regulator_is_supported_voltage(struct regulator *regulator, @@ -436,7 +436,7 @@ static inline void regulator_bulk_free(int num_consumers, { } -static inline int __deprecated regulator_can_change_voltage(struct regulator *regulator) +static inline int regulator_can_change_voltage(struct regulator *regulator) { return 0; } -- cgit v1.2.3 From 2e72448b07dc3ff1b7593e9bfff91db182262857 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 21 May 2013 16:53:03 -0400 Subject: NFS: Add COPY nfs operation This adds the copy_range file_ops function pointer used by the sys_copy_range() function call. This patch only implements sync copies, so if an async copy happens we decode the stateid and ignore it. Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 011433478a14..722509482e1a 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -504,6 +504,7 @@ enum { NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_LAYOUTSTATS, NFSPROC4_CLNT_CLONE, + NFSPROC4_CLNT_COPY, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7fcc13c8cf1f..14a762d2734d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -246,5 +246,6 @@ struct nfs_server { #define NFS_CAP_DEALLOCATE (1U << 21) #define NFS_CAP_LAYOUTSTATS (1U << 22) #define NFS_CAP_CLONE (1U << 23) +#define NFS_CAP_COPY (1U << 24) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cb9982d8f38f..e70ed54dad94 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1343,6 +1343,32 @@ struct nfs42_falloc_res { const struct nfs_server *falloc_server; }; +struct nfs42_copy_args { + struct nfs4_sequence_args seq_args; + + struct nfs_fh *src_fh; + nfs4_stateid src_stateid; + u64 src_pos; + + struct nfs_fh *dst_fh; + nfs4_stateid dst_stateid; + u64 dst_pos; + + u64 count; +}; + +struct nfs42_write_res { + u64 count; + struct nfs_writeverf verifier; +}; + +struct nfs42_copy_res { + struct nfs4_sequence_res seq_res; + struct nfs42_write_res write_res; + bool consecutive; + bool synchronous; +}; + struct nfs42_seek_args { struct nfs4_sequence_args seq_args; -- cgit v1.2.3 From 4b9c7f9db9a003f5c342184dc4401c1b7f2efb39 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 May 2016 14:40:31 -0400 Subject: sunrpc: Update RPCBIND_MAXNETIDLEN Commit 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL transports") added a 5-character netid, but did not bump RPCBIND_MAXNETIDLEN from 4 to 5. Fixes: 176e21ee2ec8 ("SUNRPC: Support for RPC over AF_LOCAL ...") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/msg_prot.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 807371357160..59cbf16eaeb5 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -158,9 +158,9 @@ typedef __be32 rpc_fraghdr; /* * Note that RFC 1833 does not put any size restrictions on the - * netid string, but all currently defined netid's fit in 4 bytes. + * netid string, but all currently defined netid's fit in 5 bytes. */ -#define RPCBIND_MAXNETIDLEN (4u) +#define RPCBIND_MAXNETIDLEN (5u) /* * Universal addresses are introduced in RFC 1833 and further spelled -- cgit v1.2.3 From 6b26cc8c8ead3636a18bfd9489984983f4ddd6f4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 May 2016 14:40:40 -0400 Subject: sunrpc: Advertise maximum backchannel payload size RPC-over-RDMA transports have a limit on how large a backward direction (backchannel) RPC message can be. Ensure that the NFSv4.x CREATE_SESSION operation advertises this limit to servers. Signed-off-by: Chuck Lever Tested-by: Steve Wise Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 9a7ddbaf116e..19c659d1c0f8 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -176,6 +176,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); int rpc_protocol(struct rpc_clnt *); struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); +size_t rpc_max_bc_payload(struct rpc_clnt *); unsigned long rpc_get_timeout(struct rpc_clnt *clnt); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index fb0d212e0d3a..5aa3834619a8 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -142,6 +142,7 @@ struct rpc_xprt_ops { int (*bc_setup)(struct rpc_xprt *xprt, unsigned int min_reqs); int (*bc_up)(struct svc_serv *serv, struct net *net); + size_t (*bc_maxpayload)(struct rpc_xprt *xprt); void (*bc_free_rqst)(struct rpc_rqst *rqst); void (*bc_destroy)(struct rpc_xprt *xprt, unsigned int max_reqs); -- cgit v1.2.3 From 29c554227aeec48cde5c22f911e51763f096e125 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 May 2016 14:40:48 -0400 Subject: xprtrdma: Bound the inline threshold values Currently the sysctls that allow setting the inline threshold allow any value to be set. Small values only make the transport run slower. The default 1KB setting is as low as is reasonable. And the logic that decides how to divide a Send buffer between RPC-over-RDMA header and RPC message assumes (but does not check) that the lower bound is not crazy (say, 57 bytes). Send and receive buffers share a page with some control information. Values larger than about 3KB can't be supported, currently. Signed-off-by: Chuck Lever Tested-by: Steve Wise Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 767190b01363..39267dc3486a 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -52,7 +52,9 @@ #define RPCRDMA_DEF_SLOT_TABLE (128U) #define RPCRDMA_MAX_SLOT_TABLE (256U) -#define RPCRDMA_DEF_INLINE (1024) /* default inline max */ +#define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ +#define RPCRDMA_DEF_INLINE (1024) /* default inline thresh */ +#define RPCRDMA_MAX_INLINE (3068) /* max inline thresh */ /* Memory registration strategies, by number. * This is part of a kernel / user space API. Do not remove. */ -- cgit v1.2.3 From 9a8f6b5ea275ff01fc8ef3b8630a3d4ed6b0a362 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 16 May 2016 17:42:42 -0400 Subject: SUNRPC: Ensure get_rpccred() and put_rpccred() can take NULL arguments Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 6f36b2bf3e05..899791573a40 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -202,7 +202,8 @@ char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline struct rpc_cred * get_rpccred(struct rpc_cred *cred) { - atomic_inc(&cred->cr_count); + if (cred != NULL) + atomic_inc(&cred->cr_count); return cred; } -- cgit v1.2.3 From 93b717fd81bf6b9a73c3702e9b079b4de8148b34 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 16 May 2016 17:42:43 -0400 Subject: NFSv4: Label stateids with the type In order to more easily distinguish what kind of stateid we are dealing with, introduce a type that can be used to label the stateid structure. The label will be useful both for debugging, but also when dealing with operations like SETATTR, READ and WRITE that can take several different types of stateid as arguments. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 722509482e1a..e1692c96cbc8 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -50,12 +50,27 @@ struct nfs4_label { typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; -struct nfs_stateid4 { - __be32 seqid; - char other[NFS4_STATEID_OTHER_SIZE]; -} __attribute__ ((packed)); +struct nfs4_stateid_struct { + union { + char data[NFS4_STATEID_SIZE]; + struct { + __be32 seqid; + char other[NFS4_STATEID_OTHER_SIZE]; + } __attribute__ ((packed)); + }; + + enum { + NFS4_INVALID_STATEID_TYPE = 0, + NFS4_SPECIAL_STATEID_TYPE, + NFS4_OPEN_STATEID_TYPE, + NFS4_LOCK_STATEID_TYPE, + NFS4_DELEGATION_STATEID_TYPE, + NFS4_LAYOUT_STATEID_TYPE, + NFS4_PNFS_DS_STATEID_TYPE, + } type; +}; -typedef struct nfs_stateid4 nfs4_stateid; +typedef struct nfs4_stateid_struct nfs4_stateid; enum nfs_opnum4 { OP_ACCESS = 3, -- cgit v1.2.3 From 183d9e7b112aaed0d19c16ffcf0f8c3a86dc71e0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 May 2016 12:28:47 -0400 Subject: pnfs: rework LAYOUTGET retry handling There are several problems in the way a stateid is selected for a LAYOUTGET operation: We pick a stateid to use in the RPC prepare op, but that makes it difficult to serialize LAYOUTGETs that use the open stateid. That serialization is done in pnfs_update_layout, which occurs well before the rpc_prepare operation. Between those two events, the i_lock is dropped and reacquired. pnfs_update_layout can find that the list has lsegs in it and not do any serialization, but then later pnfs_choose_layoutget_stateid ends up choosing the open stateid. This patch changes the client to select the stateid to use in the LAYOUTGET earlier, when we're searching for a usable layout segment. This way we can do it all while holding the i_lock the first time, and ensure that we serialize any LAYOUTGET call that uses a non-layout stateid. This also means a rework of how LAYOUTGET replies are handled, as we must now get the latest stateid if we want to retransmit in response to a retryable error. Most of those errors boil down to the fact that the layout state has changed in some fashion. Thus, what we really want to do is to re-search for a layout when it fails with a retryable error, so that we can avoid reissuing the RPC at all if possible. While the LAYOUTGET RPC is async, the initiating thread always waits for it to complete, so it's effectively synchronous anyway. Currently, when we need to retry a LAYOUTGET because of an error, we drive that retry via the rpc state machine. This means that once the call has been submitted, it runs until it completes. So, we must move the error handling for this RPC out of the rpc_call_done operation and into the caller. In order to handle errors like NFS4ERR_DELAY properly, we must also pass a pointer to the sliding timeout, which is now moved to the stack in pnfs_update_layout. The complicating errors are -NFS4ERR_RECALLCONFLICT and -NFS4ERR_LAYOUTTRYLATER, as those involve a timeout after which we give up and return NULL back to the caller. So, there is some special handling for those errors to ensure that the layers driving the retries can handle that appropriately. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/errno.h | 1 + include/linux/nfs4.h | 2 ++ include/linux/nfs_xdr.h | 2 -- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/errno.h b/include/linux/errno.h index 89627b9187f9..7ce9fb1b7d28 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -28,5 +28,6 @@ #define EBADTYPE 527 /* Type not supported by server */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ #define EIOCBQUEUED 529 /* iocb queued, will get completion event */ +#define ERECALLCONFLICT 530 /* conflict with recalled state */ #endif diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index e1692c96cbc8..bfed6b367350 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -637,7 +637,9 @@ enum pnfs_update_layout_reason { PNFS_UPDATE_LAYOUT_IO_TEST_FAIL, PNFS_UPDATE_LAYOUT_FOUND_CACHED, PNFS_UPDATE_LAYOUT_RETURN, + PNFS_UPDATE_LAYOUT_RETRY, PNFS_UPDATE_LAYOUT_BLOCKED, + PNFS_UPDATE_LAYOUT_INVALID_OPEN, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e70ed54dad94..ccb2928a0e64 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -233,7 +233,6 @@ struct nfs4_layoutget_args { struct inode *inode; struct nfs_open_context *ctx; nfs4_stateid stateid; - unsigned long timestamp; struct nfs4_layoutdriver_data layout; }; @@ -251,7 +250,6 @@ struct nfs4_layoutget { struct nfs4_layoutget_res res; struct rpc_cred *cred; gfp_t gfp_flags; - long timeout; }; struct nfs4_getdeviceinfo_args { -- cgit v1.2.3 From b52207ef4ea56f8c22288ec3387399aac72c26cf Mon Sep 17 00:00:00 2001 From: Chen Feng Date: Sun, 14 Feb 2016 14:29:21 +0800 Subject: mfd: hi655x: Add MFD driver for hi655x Add PMIC MFD driver to support hisilicon hi665x. Signed-off-by: Chen Feng Signed-off-by: Fei Wang Signed-off-by: Xinwei Kong Reviewed-by: Haojian Zhuang Signed-off-by: Lee Jones --- include/linux/mfd/hi655x-pmic.h | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 include/linux/mfd/hi655x-pmic.h (limited to 'include/linux') diff --git a/include/linux/mfd/hi655x-pmic.h b/include/linux/mfd/hi655x-pmic.h new file mode 100644 index 000000000000..dbbe9a644622 --- /dev/null +++ b/include/linux/mfd/hi655x-pmic.h @@ -0,0 +1,55 @@ +/* + * Device driver for regulators in hi655x IC + * + * Copyright (c) 2016 Hisilicon. + * + * Authors: + * Chen Feng + * Fei Wang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __HI655X_PMIC_H +#define __HI655X_PMIC_H + +/* Hi655x registers are mapped to memory bus in 4 bytes stride */ +#define HI655X_STRIDE 4 +#define HI655X_BUS_ADDR(x) ((x) << 2) + +#define HI655X_BITS 8 + +#define HI655X_NR_IRQ 32 + +#define HI655X_IRQ_STAT_BASE (0x003 << 2) +#define HI655X_IRQ_MASK_BASE (0x007 << 2) +#define HI655X_ANA_IRQM_BASE (0x1b5 << 2) +#define HI655X_IRQ_ARRAY 4 +#define HI655X_IRQ_MASK 0xFF +#define HI655X_IRQ_CLR 0xFF +#define HI655X_VER_REG 0x00 + +#define PMU_VER_START 0x10 +#define PMU_VER_END 0x38 + +#define RESERVE_INT BIT(7) +#define PWRON_D20R_INT BIT(6) +#define PWRON_D20F_INT BIT(5) +#define PWRON_D4SR_INT BIT(4) +#define VSYS_6P0_D200UR_INT BIT(3) +#define VSYS_UV_D3R_INT BIT(2) +#define VSYS_2P5_R_INT BIT(1) +#define OTMP_D1R_INT BIT(0) + +struct hi655x_pmic { + struct resource *res; + struct device *dev; + struct regmap *regmap; + int gpio; + unsigned int ver; + struct regmap_irq_chip_data *irq_data; +}; + +#endif -- cgit v1.2.3 From 94c6825e0ff75829207af6246782811b7c7af2c0 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 17 Apr 2016 17:08:40 +0300 Subject: net/mlx5_core: Use tasklet for user-space CQ completion events Previously, we've fired all our completion callbacks straight from our ISR. Some of those callbacks were lightweight (for example, mlx5 Ethernet napi callbacks), but some of them did more work (for example, the user-space RDMA stack uverbs' completion handler). Besides that, doing more than the minimal work in ISR is generally considered wrong, it could even lead to a hard lockup of the system. Since when a lot of completion events are generated by the hardware, the loop over those events could be so long, that we'll get into a hard lockup by the system watchdog. In order to avoid that, add a new way of invoking completion events callbacks. In the interrupt itself, we add the CQs which receive completion event to a per-EQ list and schedule a tasklet. In the tasklet context we loop over all the CQs in the list and invoke the user callback. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- include/linux/mlx5/cq.h | 5 +++++ include/linux/mlx5/driver.h | 10 ++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index b2c9fada8eac..2be976dd4966 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -53,6 +53,11 @@ struct mlx5_core_cq { unsigned arm_sn; struct mlx5_rsc_debug *dbg; int pid; + struct { + struct list_head list; + void (*comp)(struct mlx5_core_cq *); + void *priv; + } tasklet_ctx; }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 369c837d40f5..5a41f9003941 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -304,6 +305,14 @@ struct mlx5_buf { u8 page_shift; }; +struct mlx5_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + /* lock on completion tasklet list */ + spinlock_t lock; +}; + struct mlx5_eq { struct mlx5_core_dev *dev; __be32 __iomem *doorbell; @@ -317,6 +326,7 @@ struct mlx5_eq { struct list_head list; int index; struct mlx5_rsc_debug *dbg; + struct mlx5_eq_tasklet tasklet_ctx; }; struct mlx5_core_psv { -- cgit v1.2.3 From dd1a4cc1fbdf516bb38ca31b65c76e720d414d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 4 May 2016 14:09:44 -0500 Subject: KVM: split kvm_vcpu_wake_up from kvm_vcpu_kick MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AVIC has a use for kvm_vcpu_wake_up. Signed-off-by: Radim Krčmář Tested-by: Suravee Suthikulpanit Reviewed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bbcd921d7cb0..b1fa8f11c95b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -657,6 +657,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); +void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 45c04704e467fffe3525205454d9627325dae308 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 18 May 2016 16:19:01 +0300 Subject: ASoC: twl6040: Disconnect AUX output pads on digital mute Disconnect also the path to AUXL from the HF path during digital_mute to avoid pop noise leakage to Line-out pads. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown --- include/linux/mfd/twl6040.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h index 8f9fc3d26e6d..8e95cd87cd74 100644 --- a/include/linux/mfd/twl6040.h +++ b/include/linux/mfd/twl6040.h @@ -134,6 +134,7 @@ #define TWL6040_HFDACENA (1 << 0) #define TWL6040_HFPGAENA (1 << 1) #define TWL6040_HFDRVENA (1 << 4) +#define TWL6040_HFSWENA (1 << 6) /* VIBCTLL/R (0x18/0x1A) fields */ -- cgit v1.2.3 From 0a70bd43053331d99881211e1d09f32de531432f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 24 Feb 2016 14:02:11 -0800 Subject: dax: enable dax in the presence of known media errors (badblocks) 1/ If a mapping overlaps a bad sector fail the request. 2/ Do not opportunistically report more dax-capable capacity than is requested when errors present. Reviewed-by: Jeff Moyer Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams [vishal: fix a conflict with system RAM collision patches] [vishal: add a 'size' parameter to ->direct_access] [vishal: fix a conflict with DAX alignment check patches] Signed-off-by: Vishal Verma --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 27cbefe8c985..cf7c13c2c38d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1668,7 +1668,7 @@ struct block_device_operations { int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); long (*direct_access)(struct block_device *, sector_t, void __pmem **, - pfn_t *); + pfn_t *, long); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ -- cgit v1.2.3 From 3dc29161070ab14d065554c0ad58988ab77a7bfd Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 15 Mar 2016 11:20:41 -0600 Subject: dax: use sb_issue_zerout instead of calling dax_clear_sectors dax_clear_sectors() cannot handle poisoned blocks. These must be zeroed using the BIO interface instead. Convert ext2 and XFS to use only sb_issue_zerout(). Reviewed-by: Jeff Moyer Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox [vishal: Also remove the dax_clear_sectors function entirely] Signed-off-by: Vishal Verma --- include/linux/dax.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 7c45ac7ea1d1..7f853ffaa987 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -7,7 +7,6 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, get_block_t, dio_iodone_t, int flags); -int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); -- cgit v1.2.3 From 679c8bd3b29428e736eabb7fc66a978f312f0c86 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 May 2016 10:47:04 +0200 Subject: dax: export a low-level __dax_zero_page_range helper This allows XFS to perform zeroing using the iomap infrastructure and avoid buffer heads. Reviewed-by: Jan Kara Signed-off-by: Christoph Hellwig [vishal: fix conflicts with dax-error-handling] Signed-off-by: Vishal Verma --- include/linux/dax.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 7f853ffaa987..90fbc99e5313 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -14,12 +14,19 @@ int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); #ifdef CONFIG_FS_DAX struct page *read_dax_sector(struct block_device *bdev, sector_t n); +int __dax_zero_page_range(struct block_device *bdev, sector_t sector, + unsigned int offset, unsigned int length); #else static inline struct page *read_dax_sector(struct block_device *bdev, sector_t n) { return ERR_PTR(-ENXIO); } +static inline int __dax_zero_page_range(struct block_device *bdev, + sector_t sector, unsigned int offset, unsigned int length) +{ + return -ENXIO; +} #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE -- cgit v1.2.3 From 348e967ab07c96a9e7a6a194812254a8df2045c0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 May 2016 18:29:15 +0200 Subject: dax: Make huge page handling depend of CONFIG_BROKEN Currently the handling of huge pages for DAX is racy. For example the following can happen: CPU0 (THP write fault) CPU1 (normal read fault) __dax_pmd_fault() __dax_fault() get_block(inode, block, &bh, 0) -> not mapped get_block(inode, block, &bh, 0) -> not mapped if (!buffer_mapped(&bh) && write) get_block(inode, block, &bh, 1) -> allocates blocks truncate_pagecache_range(inode, lstart, lend); dax_load_hole(); This results in data corruption since process on CPU1 won't see changes into the file done by CPU0. The race can happen even if two normal faults race however with THP the situation is even worse because the two faults don't operate on the same entries in the radix tree and we want to use these entries for serialization. So make THP support in DAX code depend on CONFIG_BROKEN for now. Signed-off-by: Jan Kara Signed-off-by: Ross Zwisler --- include/linux/dax.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 90fbc99e5313..72dc81de3ddb 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -29,7 +29,7 @@ static inline int __dax_zero_page_range(struct block_device *bdev, } #endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, unsigned int flags, get_block_t); int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, -- cgit v1.2.3 From e804315dd0f574b56155c5a2406ab5e0318104f7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 May 2016 18:29:16 +0200 Subject: dax: Define DAX lock bit for radix tree exceptional entry We will use lowest available bit in the radix tree exceptional entry for locking of the entry. Define it. Also clean up definitions of DAX entry type bits in DAX exceptional entries to use defined constants instead of hardcoding numbers and cleanup checking of these bits to not rely on how other bits in the entry are set. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Ross Zwisler --- include/linux/dax.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 72dc81de3ddb..70600b63083f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -5,6 +5,9 @@ #include #include +/* We use lowest available exceptional entry bit for locking */ +#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) + ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, get_block_t, dio_iodone_t, int flags); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); -- cgit v1.2.3 From 4f622938a5e2b7f1374ffb1e5fc212744898f513 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 May 2016 18:29:17 +0200 Subject: dax: Allow DAX code to replace exceptional entries Currently we forbid page_cache_tree_insert() to replace exceptional radix tree entries for DAX inodes. However to make DAX faults race free we will lock radix tree entries and when hole is created, we need to replace such locked radix tree entry with a hole page. So modify page_cache_tree_insert() to allow that. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Ross Zwisler --- include/linux/dax.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 70600b63083f..aa148937bb3f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -3,6 +3,7 @@ #include #include +#include #include /* We use lowest available exceptional entry bit for locking */ -- cgit v1.2.3 From ac401cc782429cc8560ce4840b1405d603740917 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 May 2016 18:29:18 +0200 Subject: dax: New fault locking Currently DAX page fault locking is racy. CPU0 (write fault) CPU1 (read fault) __dax_fault() __dax_fault() get_block(inode, block, &bh, 0) -> not mapped get_block(inode, block, &bh, 0) -> not mapped if (!buffer_mapped(&bh)) if (vmf->flags & FAULT_FLAG_WRITE) get_block(inode, block, &bh, 1) -> allocates blocks if (page) -> no if (!buffer_mapped(&bh)) if (vmf->flags & FAULT_FLAG_WRITE) { } else { dax_load_hole(); } dax_insert_mapping() And we are in a situation where we fail in dax_radix_entry() with -EIO. Another problem with the current DAX page fault locking is that there is no race-free way to clear dirty tag in the radix tree. We can always end up with clean radix tree and dirty data in CPU cache. We fix the first problem by introducing locking of exceptional radix tree entries in DAX mappings acting very similarly to page lock and thus synchronizing properly faults against the same mapping index. The same lock can later be used to avoid races when clearing radix tree dirty tag. Reviewed-by: NeilBrown Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Ross Zwisler --- include/linux/dax.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index aa148937bb3f..756625c6d0dd 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -15,6 +15,9 @@ int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +void dax_wake_mapping_entry_waiter(struct address_space *mapping, + pgoff_t index, bool wake_all); #ifdef CONFIG_FS_DAX struct page *read_dax_sector(struct block_device *bdev, sector_t n); -- cgit v1.2.3 From bc2466e4257369d0ebee2b6265070d323343fa72 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 May 2016 18:29:19 +0200 Subject: dax: Use radix tree entry lock to protect cow faults When doing cow faults, we cannot directly fill in PTE as we do for other faults as we rely on generic code to do proper accounting of the cowed page. We also have no page to lock to protect against races with truncate as other faults have and we need the protection to extend until the moment generic code inserts cowed page into PTE thus at that point we have no protection of fs-specific i_mmap_sem. So far we relied on using i_mmap_lock for the protection however that is completely special to cow faults. To make fault locking more uniform use DAX entry lock instead. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Ross Zwisler --- include/linux/dax.h | 7 +++++++ include/linux/mm.h | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 756625c6d0dd..7bf12277c006 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -21,6 +21,7 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, #ifdef CONFIG_FS_DAX struct page *read_dax_sector(struct block_device *bdev, sector_t n); +void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index); int __dax_zero_page_range(struct block_device *bdev, sector_t sector, unsigned int offset, unsigned int length); #else @@ -29,6 +30,12 @@ static inline struct page *read_dax_sector(struct block_device *bdev, { return ERR_PTR(-ENXIO); } +/* Shouldn't ever be called when dax is disabled. */ +static inline void dax_unlock_mapping_entry(struct address_space *mapping, + pgoff_t index) +{ + BUG(); +} static inline int __dax_zero_page_range(struct block_device *bdev, sector_t sector, unsigned int offset, unsigned int length) { diff --git a/include/linux/mm.h b/include/linux/mm.h index a55e5be0894f..0ef9dc720ec3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -299,6 +299,12 @@ struct vm_fault { * is set (which is also implied by * VM_FAULT_ERROR). */ + void *entry; /* ->fault handler can alternatively + * return locked DAX entry. In that + * case handler should return + * VM_FAULT_DAX_LOCKED and fill in + * entry here. + */ /* for ->map_pages() only */ pgoff_t max_pgoff; /* map pages for offset from pgoff till * max_pgoff inclusive */ @@ -1084,6 +1090,7 @@ static inline void clear_page_pfmemalloc(struct page *page) #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ +#define VM_FAULT_DAX_LOCKED 0x1000 /* ->fault has locked DAX entry */ #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ -- cgit v1.2.3 From 35e481761cdc688dbee0ef552a13f49af8eba6cc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 19 May 2016 17:08:59 -0700 Subject: fsnotify: avoid spurious EMFILE errors from inotify_init() Inotify instance is destroyed when all references to it are dropped. That not only means that the corresponding file descriptor needs to be closed but also that all corresponding instance marks are freed (as each mark holds a reference to the inotify instance). However marks are freed only after SRCU period ends which can take some time and thus if user rapidly creates and frees inotify instances, number of existing inotify instances can exceed max_user_instances limit although from user point of view there is always at most one existing instance. Thus inotify_init() returns EMFILE error which is hard to justify from user point of view. This problem is exposed by LTP inotify06 testcase on some machines. We fix the problem by making sure all group marks are properly freed while destroying inotify instance. We wait for SRCU period to end in that path anyway since we have to make sure there is no event being added to the instance while we are tearing down the instance. So it takes only some plumbing to allow for marks to be destroyed in that path as well and not from a dedicated work item. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jan Kara Reported-by: Xiaoguang Wang Tested-by: Xiaoguang Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fsnotify_backend.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1259e53d9296..29f917517299 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -359,8 +359,6 @@ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the marks where mark->flags & flags is true*/ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); -/* run all the marks in a group, and flag them to be freed */ -extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_unmount_inodes(struct super_block *sb); -- cgit v1.2.3 From bc2c53e5f1a2bae69ae50ce3a592633da7fcf6d9 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Thu, 19 May 2016 17:09:02 -0700 Subject: time: add missing implementation for timespec64_add_safe() timespec64_add_safe() has been defined in time64.h for 64 bit systems. But, 32 bit systems only have an extern function prototype defined. Provide a definition for the above function. The function will be necessary as part of y2038 changes. struct timespec is not y2038 safe. All references to timespec will be replaced by struct timespec64. The function is meant to be a replacement for timespec_add_safe(). The implementation is similar to timespec_add_safe(). Link: http://lkml.kernel.org/r/1461947989-21926-2-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Acked-by: John Stultz Cc: Thomas Gleixner Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time64.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index 367d5af899e8..1778937221bf 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -136,13 +136,11 @@ extern void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 n /* * timespec64_add_safe assumes both values are positive and checks for - * overflow. It will return TIME_T_MAX if the returned value would be - * smaller then either of the arguments. + * overflow. It will return TIME64_MAX in case of overflow. */ extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs, const struct timespec64 rhs); - static inline struct timespec64 timespec64_add(struct timespec64 lhs, struct timespec64 rhs) { -- cgit v1.2.3 From 766b9f928bd5b9b185d986d40355d1f143484136 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Thu, 19 May 2016 17:09:05 -0700 Subject: fs: poll/select/recvmmsg: use timespec64 for timeout events struct timespec is not y2038 safe. Even though timespec might be sufficient to represent timeouts, use struct timespec64 here as the plan is to get rid of all timespec reference in the kernel. The patch transitions the common functions: poll_select_set_timeout() and select_estimate_accuracy() to use timespec64. And, all the syscalls that use these functions are transitioned in the same patch. The restart block parameters for poll uses monotonic time. Use timespec64 here as well to assign timeout value. This parameter in the restart block need not change because this only holds the monotonic timestamp at which timeout should occur. And, unsigned long data type should be big enough for this timestamp. The system call interfaces will be handled in a separate series. Compat interfaces need not change as timespec64 is an alias to struct timespec on a 64 bit system. Link: http://lkml.kernel.org/r/1461947989-21926-3-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Acked-by: John Stultz Acked-by: David S. Miller Cc: Alexander Viro Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poll.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poll.h b/include/linux/poll.h index 9fb4f40d9a26..37b057b63b46 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -96,7 +96,7 @@ extern void poll_initwait(struct poll_wqueues *pwq); extern void poll_freewait(struct poll_wqueues *pwq); extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ktime_t *expires, unsigned long slack); -extern u64 select_estimate_accuracy(struct timespec *tv); +extern u64 select_estimate_accuracy(struct timespec64 *tv); static inline int poll_schedule(struct poll_wqueues *pwq, int state) @@ -153,12 +153,13 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset) #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1) -extern int do_select(int n, fd_set_bits *fds, struct timespec *end_time); +extern int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time); extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, - struct timespec *end_time); + struct timespec64 *end_time); extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timespec *end_time); + fd_set __user *exp, struct timespec64 *end_time); -extern int poll_select_set_timeout(struct timespec *to, long sec, long nsec); +extern int poll_select_set_timeout(struct timespec64 *to, time64_t sec, + long nsec); #endif /* _LINUX_POLL_H */ -- cgit v1.2.3 From 8e4f70e21877297577dce13cca97599a5864a91f Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Thu, 19 May 2016 17:09:08 -0700 Subject: time: remove timespec_add_safe() All references to timespec_add_safe() now use timespec64_add_safe(). The plan is to replace struct timespec references with struct timespec64 throughout the kernel as timespec is not y2038 safe. Drop timespec_add_safe() and use timespec64_add_safe() for all architectures. Link: http://lkml.kernel.org/r/1461947989-21926-4-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Acked-by: John Stultz Cc: Thomas Gleixner Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time64.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index 1778937221bf..7e5d2fa9ac46 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -65,7 +65,6 @@ static inline struct itimerspec64 itimerspec_to_itimerspec64(struct itimerspec * # define timespec64_equal timespec_equal # define timespec64_compare timespec_compare # define set_normalized_timespec64 set_normalized_timespec -# define timespec64_add_safe timespec_add_safe # define timespec64_add timespec_add # define timespec64_sub timespec_sub # define timespec64_valid timespec_valid @@ -134,13 +133,6 @@ static inline int timespec64_compare(const struct timespec64 *lhs, const struct extern void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec); -/* - * timespec64_add_safe assumes both values are positive and checks for - * overflow. It will return TIME64_MAX in case of overflow. - */ -extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs, - const struct timespec64 rhs); - static inline struct timespec64 timespec64_add(struct timespec64 lhs, struct timespec64 rhs) { @@ -222,4 +214,11 @@ static __always_inline void timespec64_add_ns(struct timespec64 *a, u64 ns) #endif +/* + * timespec64_add_safe assumes both values are positive and checks for + * overflow. It will return TIME64_MAX in case of overflow. + */ +extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs, + const struct timespec64 rhs); + #endif /* _LINUX_TIME64_H */ -- cgit v1.2.3 From b1e4d9d82df8ab9097f80aa208c40eab6fc29858 Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 19 May 2016 17:09:20 -0700 Subject: debugobjects: make fixup functions return bool instead of int I am going to introduce debugobjects infrastructure to USB subsystem. But before this, I found the code of debugobjects could be improved. This patchset will make fixup functions return bool type instead of int. Because fixup only need report success or no. boolean is the 'real' type. This patch (of 7): The object debugging infrastructure core provides some fixup callbacks for the subsystem who use it. These callbacks are called from the debug code whenever a problem in debug_object_init is detected. And debugobjects core suppose them returns 1 when the fixup was successful, otherwise 0. So the return type is boolean. A bad thing is that debug_object_fixup use the return value for arithmetic operation. It confused me that what is the reall return type. Reading over the whole code, I found some place do use the return value incorrectly(see next patch). So why use bool type instead? Signed-off-by: Du, Changbin Cc: Jonathan Corbet Cc: Josh Triplett Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tejun Heo Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/debugobjects.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 98ffcbd4888e..a899f10c9365 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -39,7 +39,8 @@ struct debug_obj { * @debug_hint: function returning address, which have associated * kernel symbol, to allow identify the object * @fixup_init: fixup function, which is called when the init check - * fails + * fails. All fixup functions must return true if fixup + * was successful, otherwise return false * @fixup_activate: fixup function, which is called when the activate check * fails * @fixup_destroy: fixup function, which is called when the destroy check @@ -51,12 +52,12 @@ struct debug_obj { */ struct debug_obj_descr { const char *name; - void *(*debug_hint) (void *addr); - int (*fixup_init) (void *addr, enum debug_obj_state state); - int (*fixup_activate) (void *addr, enum debug_obj_state state); - int (*fixup_destroy) (void *addr, enum debug_obj_state state); - int (*fixup_free) (void *addr, enum debug_obj_state state); - int (*fixup_assert_init)(void *addr, enum debug_obj_state state); + void *(*debug_hint)(void *addr); + bool (*fixup_init)(void *addr, enum debug_obj_state state); + bool (*fixup_activate)(void *addr, enum debug_obj_state state); + bool (*fixup_destroy)(void *addr, enum debug_obj_state state); + bool (*fixup_free)(void *addr, enum debug_obj_state state); + bool (*fixup_assert_init)(void *addr, enum debug_obj_state state); }; #ifdef CONFIG_DEBUG_OBJECTS -- cgit v1.2.3 From b9fdac7f660609abb157500e468d2165b3c9cf08 Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 19 May 2016 17:09:41 -0700 Subject: debugobjects: insulate non-fixup logic related to static obj from fixup callbacks When activating a static object we need make sure that the object is tracked in the object tracker. If it is a non-static object then the activation is illegal. In previous implementation, each subsystem need take care of this in their fixup callbacks. Actually we can put it into debugobjects core. Thus we can save duplicated code, and have *pure* fixup callbacks. To achieve this, a new callback "is_static_object" is introduced to let the type specific code decide whether a object is static or not. If yes, we take it into object tracker, otherwise give warning and invoke fixup callback. This change has paassed debugobjects selftest, and I also do some test with all debugobjects supports enabled. At last, I have a concern about the fixups that can it change the object which is in incorrect state on fixup? Because the 'addr' may not point to any valid object if a non-static object is not tracked. Then Change such object can overwrite someone's memory and cause unexpected behaviour. For example, the timer_fixup_activate bind timer to function stub_timer. Link: http://lkml.kernel.org/r/1462576157-14539-1-git-send-email-changbin.du@intel.com [changbin.du@intel.com: improve code comments where invoke the new is_static_object callback] Link: http://lkml.kernel.org/r/1462777431-8171-1-git-send-email-changbin.du@intel.com Signed-off-by: Du, Changbin Cc: Jonathan Corbet Cc: Josh Triplett Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tejun Heo Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/debugobjects.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index a899f10c9365..46056cb161fc 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -38,6 +38,7 @@ struct debug_obj { * @name: name of the object typee * @debug_hint: function returning address, which have associated * kernel symbol, to allow identify the object + * @is_static_object return true if the obj is static, otherwise return false * @fixup_init: fixup function, which is called when the init check * fails. All fixup functions must return true if fixup * was successful, otherwise return false @@ -53,6 +54,7 @@ struct debug_obj { struct debug_obj_descr { const char *name; void *(*debug_hint)(void *addr); + bool (*is_static_object)(void *addr); bool (*fixup_init)(void *addr, enum debug_obj_state state); bool (*fixup_activate)(void *addr, enum debug_obj_state state); bool (*fixup_destroy)(void *addr, enum debug_obj_state state); -- cgit v1.2.3 From 815613da6a67c196d7458d0e6c278ea88e21933f Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 19 May 2016 17:09:56 -0700 Subject: kernel/padata.c: removed unused code By accident I stumbled across code that has never been used. This driver has EXPORT_SYMBOL functions, and the only user of the code is pcrypt.c, but this only uses a subset of the exported symbols. According to 'git log -G', the functions, padata_set_cpumasks, padata_add_cpu, and padata_remove_cpu have never been used since they were first introduced. This patch removes the unused code. On one 64 bit build, with CRYPTO_PCRYPT built in, the text is more than 4k smaller. kbuild_hp> size $KBUILD_OUTPUT/vmlinux text data bss dec hex filename 10566658 4678360 1122304 16367322 f9beda vmlinux 10561984 4678360 1122304 16362648 f9ac98 vmlinux On another config, 32 bit, the saving is about 0.5k bytes. kbuild_hp-x86> size $KBUILD_OUTPUT/vmlinux 6012005 2409513 2785280 11206798 ab008e vmlinux 6011491 2409513 2785280 11206284 aafe8c vmlinux Signed-off-by: Richard Cochran Cc: Steffen Klassert Cc: Herbert Xu Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/padata.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 438694650471..113ee626a4dc 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -175,11 +175,6 @@ extern int padata_do_parallel(struct padata_instance *pinst, extern void padata_do_serial(struct padata_priv *padata); extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, cpumask_var_t cpumask); -extern int padata_set_cpumasks(struct padata_instance *pinst, - cpumask_var_t pcpumask, - cpumask_var_t cbcpumask); -extern int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask); -extern int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask); extern int padata_start(struct padata_instance *pinst); extern void padata_stop(struct padata_instance *pinst); extern int padata_register_cpumask_notifier(struct padata_instance *pinst, -- cgit v1.2.3 From c7ce4f60ac199fb3521c5fcd64da21cee801ec2b Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Thu, 19 May 2016 17:10:37 -0700 Subject: mm: SLAB freelist randomization Provides an optional config (CONFIG_SLAB_FREELIST_RANDOM) to randomize the SLAB freelist. The list is randomized during initialization of a new set of pages. The order on different freelist sizes is pre-computed at boot for performance. Each kmem_cache has its own randomized freelist. Before pre-computed lists are available freelists are generated dynamically. This security feature reduces the predictability of the kernel SLAB allocator against heap overflows rendering attacks much less stable. For example this attack against SLUB (also applicable against SLAB) would be affected: https://jon.oberheide.org/blog/2010/09/10/linux-kernel-can-slub-overflow/ Also, since v4.6 the freelist was moved at the end of the SLAB. It means a controllable heap is opened to new attacks not yet publicly discussed. A kernel heap overflow can be transformed to multiple use-after-free. This feature makes this type of attack harder too. To generate entropy, we use get_random_bytes_arch because 0 bits of entropy is available in the boot stage. In the worse case this function will fallback to the get_random_bytes sub API. We also generate a shift random number to shift pre-computed freelist for each new set of pages. The config option name is not specific to the SLAB as this approach will be extended to other allocators like SLUB. Performance results highlighted no major changes: Hackbench (running 90 10 times): Before average: 0.0698 After average: 0.0663 (-5.01%) slab_test 1 run on boot. Difference only seen on the 2048 size test being the worse case scenario covered by freelist randomization. New slab pages are constantly being created on the 10000 allocations. Variance should be mainly due to getting new pages every few allocations. Before: Single thread testing ===================== 1. Kmalloc: Repeatedly allocate then free test 10000 times kmalloc(8) -> 99 cycles kfree -> 112 cycles 10000 times kmalloc(16) -> 109 cycles kfree -> 140 cycles 10000 times kmalloc(32) -> 129 cycles kfree -> 137 cycles 10000 times kmalloc(64) -> 141 cycles kfree -> 141 cycles 10000 times kmalloc(128) -> 152 cycles kfree -> 148 cycles 10000 times kmalloc(256) -> 195 cycles kfree -> 167 cycles 10000 times kmalloc(512) -> 257 cycles kfree -> 199 cycles 10000 times kmalloc(1024) -> 393 cycles kfree -> 251 cycles 10000 times kmalloc(2048) -> 649 cycles kfree -> 228 cycles 10000 times kmalloc(4096) -> 806 cycles kfree -> 370 cycles 10000 times kmalloc(8192) -> 814 cycles kfree -> 411 cycles 10000 times kmalloc(16384) -> 892 cycles kfree -> 455 cycles 2. Kmalloc: alloc/free test 10000 times kmalloc(8)/kfree -> 121 cycles 10000 times kmalloc(16)/kfree -> 121 cycles 10000 times kmalloc(32)/kfree -> 121 cycles 10000 times kmalloc(64)/kfree -> 121 cycles 10000 times kmalloc(128)/kfree -> 121 cycles 10000 times kmalloc(256)/kfree -> 119 cycles 10000 times kmalloc(512)/kfree -> 119 cycles 10000 times kmalloc(1024)/kfree -> 119 cycles 10000 times kmalloc(2048)/kfree -> 119 cycles 10000 times kmalloc(4096)/kfree -> 121 cycles 10000 times kmalloc(8192)/kfree -> 119 cycles 10000 times kmalloc(16384)/kfree -> 119 cycles After: Single thread testing ===================== 1. Kmalloc: Repeatedly allocate then free test 10000 times kmalloc(8) -> 130 cycles kfree -> 86 cycles 10000 times kmalloc(16) -> 118 cycles kfree -> 86 cycles 10000 times kmalloc(32) -> 121 cycles kfree -> 85 cycles 10000 times kmalloc(64) -> 176 cycles kfree -> 102 cycles 10000 times kmalloc(128) -> 178 cycles kfree -> 100 cycles 10000 times kmalloc(256) -> 205 cycles kfree -> 109 cycles 10000 times kmalloc(512) -> 262 cycles kfree -> 136 cycles 10000 times kmalloc(1024) -> 342 cycles kfree -> 157 cycles 10000 times kmalloc(2048) -> 701 cycles kfree -> 238 cycles 10000 times kmalloc(4096) -> 803 cycles kfree -> 364 cycles 10000 times kmalloc(8192) -> 835 cycles kfree -> 404 cycles 10000 times kmalloc(16384) -> 896 cycles kfree -> 441 cycles 2. Kmalloc: alloc/free test 10000 times kmalloc(8)/kfree -> 121 cycles 10000 times kmalloc(16)/kfree -> 121 cycles 10000 times kmalloc(32)/kfree -> 123 cycles 10000 times kmalloc(64)/kfree -> 142 cycles 10000 times kmalloc(128)/kfree -> 121 cycles 10000 times kmalloc(256)/kfree -> 119 cycles 10000 times kmalloc(512)/kfree -> 119 cycles 10000 times kmalloc(1024)/kfree -> 119 cycles 10000 times kmalloc(2048)/kfree -> 119 cycles 10000 times kmalloc(4096)/kfree -> 119 cycles 10000 times kmalloc(8192)/kfree -> 119 cycles 10000 times kmalloc(16384)/kfree -> 119 cycles [akpm@linux-foundation.org: propagate gfp_t into cache_random_seq_create()] Signed-off-by: Thomas Garnier Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Kees Cook Cc: Greg Thelen Cc: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab_def.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 9edbbf352340..8694f7a5d92b 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -80,6 +80,10 @@ struct kmem_cache { struct kasan_cache kasan_info; #endif +#ifdef CONFIG_SLAB_FREELIST_RANDOM + void *random_seq; +#endif + struct kmem_cache_node *node[MAX_NUMNODES]; }; -- cgit v1.2.3 From 0139aa7b7fa12ceef095d99dc36606a5b10ab83a Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 19 May 2016 17:10:49 -0700 Subject: mm: rename _count, field of the struct page, to _refcount Many developers already know that field for reference count of the struct page is _count and atomic type. They would try to handle it directly and this could break the purpose of page reference count tracepoint. To prevent direct _count modification, this patch rename it to _refcount and add warning message on the code. After that, developer who need to handle reference count will find that field should not be accessed directly. [akpm@linux-foundation.org: fix comments, per Vlastimil] [akpm@linux-foundation.org: Documentation/vm/transhuge.txt too] [sfr@canb.auug.org.au: sync ethernet driver changes] Signed-off-by: Joonsoo Kim Signed-off-by: Stephen Rothwell Cc: Vlastimil Babka Cc: Hugh Dickins Cc: Johannes Berg Cc: "David S. Miller" Cc: Sunil Goutham Cc: Chris Metcalf Cc: Manish Chopra Cc: Yuval Mintz Cc: Tariq Toukan Cc: Saeed Mahameed Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- include/linux/mm_types.h | 14 +++++++++----- include/linux/page_ref.h | 26 +++++++++++++------------- include/linux/pagemap.h | 8 ++++---- 4 files changed, 27 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 727f799757ab..1193a54ea2b3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -734,7 +734,7 @@ static inline void get_page(struct page *page) page = compound_head(page); /* * Getting a normal page or the head of a compound page - * requires to already have an elevated page->_count. + * requires to already have an elevated page->_refcount. */ VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page); page_ref_inc(page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c2d75b4fa86c..1fda9c99ef95 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -73,9 +73,9 @@ struct page { unsigned long counters; #else /* - * Keep _count separate from slub cmpxchg_double data. - * As the rest of the double word is protected by - * slab_lock but _count is not. + * Keep _refcount separate from slub cmpxchg_double + * data. As the rest of the double word is protected by + * slab_lock but _refcount is not. */ unsigned counters; #endif @@ -97,7 +97,11 @@ struct page { }; int units; /* SLOB */ }; - atomic_t _count; /* Usage count, see below. */ + /* + * Usage count, *USE WRAPPER FUNCTION* + * when manual accounting. See page_ref.h + */ + atomic_t _refcount; }; unsigned int active; /* SLAB */ }; @@ -248,7 +252,7 @@ struct page_frag_cache { __u32 offset; #endif /* we maintain a pagecount bias, so that we dont dirty cache line - * containing page->_count every time we allocate a fragment. + * containing page->_refcount every time we allocate a fragment. */ unsigned int pagecnt_bias; bool pfmemalloc; diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index e596d5d9540e..8b5e0a9f2431 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -63,17 +63,17 @@ static inline void __page_ref_unfreeze(struct page *page, int v) static inline int page_ref_count(struct page *page) { - return atomic_read(&page->_count); + return atomic_read(&page->_refcount); } static inline int page_count(struct page *page) { - return atomic_read(&compound_head(page)->_count); + return atomic_read(&compound_head(page)->_refcount); } static inline void set_page_count(struct page *page, int v) { - atomic_set(&page->_count, v); + atomic_set(&page->_refcount, v); if (page_ref_tracepoint_active(__tracepoint_page_ref_set)) __page_ref_set(page, v); } @@ -89,35 +89,35 @@ static inline void init_page_count(struct page *page) static inline void page_ref_add(struct page *page, int nr) { - atomic_add(nr, &page->_count); + atomic_add(nr, &page->_refcount); if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) __page_ref_mod(page, nr); } static inline void page_ref_sub(struct page *page, int nr) { - atomic_sub(nr, &page->_count); + atomic_sub(nr, &page->_refcount); if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) __page_ref_mod(page, -nr); } static inline void page_ref_inc(struct page *page) { - atomic_inc(&page->_count); + atomic_inc(&page->_refcount); if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) __page_ref_mod(page, 1); } static inline void page_ref_dec(struct page *page) { - atomic_dec(&page->_count); + atomic_dec(&page->_refcount); if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) __page_ref_mod(page, -1); } static inline int page_ref_sub_and_test(struct page *page, int nr) { - int ret = atomic_sub_and_test(nr, &page->_count); + int ret = atomic_sub_and_test(nr, &page->_refcount); if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test)) __page_ref_mod_and_test(page, -nr, ret); @@ -126,7 +126,7 @@ static inline int page_ref_sub_and_test(struct page *page, int nr) static inline int page_ref_dec_and_test(struct page *page) { - int ret = atomic_dec_and_test(&page->_count); + int ret = atomic_dec_and_test(&page->_refcount); if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test)) __page_ref_mod_and_test(page, -1, ret); @@ -135,7 +135,7 @@ static inline int page_ref_dec_and_test(struct page *page) static inline int page_ref_dec_return(struct page *page) { - int ret = atomic_dec_return(&page->_count); + int ret = atomic_dec_return(&page->_refcount); if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return)) __page_ref_mod_and_return(page, -1, ret); @@ -144,7 +144,7 @@ static inline int page_ref_dec_return(struct page *page) static inline int page_ref_add_unless(struct page *page, int nr, int u) { - int ret = atomic_add_unless(&page->_count, nr, u); + int ret = atomic_add_unless(&page->_refcount, nr, u); if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); @@ -153,7 +153,7 @@ static inline int page_ref_add_unless(struct page *page, int nr, int u) static inline int page_ref_freeze(struct page *page, int count) { - int ret = likely(atomic_cmpxchg(&page->_count, count, 0) == count); + int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); if (page_ref_tracepoint_active(__tracepoint_page_ref_freeze)) __page_ref_freeze(page, count, ret); @@ -165,7 +165,7 @@ static inline void page_ref_unfreeze(struct page *page, int count) VM_BUG_ON_PAGE(page_count(page) != 0, page); VM_BUG_ON(count == 0); - atomic_set(&page->_count, count); + atomic_set(&page->_refcount, count); if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze)) __page_ref_unfreeze(page, count); } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7e1ab155c67c..fe1513ffb7bf 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -90,12 +90,12 @@ void release_pages(struct page **pages, int nr, bool cold); /* * speculatively take a reference to a page. - * If the page is free (_count == 0), then _count is untouched, and 0 - * is returned. Otherwise, _count is incremented by 1 and 1 is returned. + * If the page is free (_refcount == 0), then _refcount is untouched, and 0 + * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned. * * This function must be called inside the same rcu_read_lock() section as has * been used to lookup the page in the pagecache radix-tree (or page table): - * this allows allocators to use a synchronize_rcu() to stabilize _count. + * this allows allocators to use a synchronize_rcu() to stabilize _refcount. * * Unless an RCU grace period has passed, the count of all pages coming out * of the allocator must be considered unstable. page_count may return higher @@ -111,7 +111,7 @@ void release_pages(struct page **pages, int nr, bool cold); * 2. conditionally increment refcount * 3. check the page is still in pagecache (if no, goto 1) * - * Remove-side that cares about stability of _count (eg. reclaim) has the + * Remove-side that cares about stability of _refcount (eg. reclaim) has the * following (with tree_lock held for write): * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg) * B. remove page from pagecache -- cgit v1.2.3 From d64e85d3e1c59c3664b9ec1183052ec4641ea1e2 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 19 May 2016 17:10:52 -0700 Subject: compiler.h: add support for malloc attribute gcc as far back as at least 3.04 documents the function attribute __malloc__. Add a shorthand for attaching that to a function declaration. This was also suggested by Andi Kleen way back in 2002 [1], but didn't get applied, perhaps because gcc at that time generated the exact same code with and without this attribute. This attribute tells the compiler that the return value (if non-NULL) can be assumed not to alias any other valid pointers at the time of the call. Please note that the documentation for a range of gcc versions (starting from around 4.7) contained a somewhat confusing and self-contradicting text: The malloc attribute is used to tell the compiler that a function may be treated as if any non-NULL pointer it returns cannot alias any other pointer valid when the function returns and *that the memory has undefined content*. [...] Standard functions with this property include malloc and *calloc*. (emphasis mine). The intended meaning has later been clarified [2]: This tells the compiler that a function is malloc-like, i.e., that the pointer P returned by the function cannot alias any other pointer valid when the function returns, and moreover no pointers to valid objects occur in any storage addressed by P. What this means is that we can apply the attribute to kmalloc and friends, and it is ok for the returned memory to have well-defined contents (__GFP_ZERO). But it is not ok to apply it to kmemdup(), nor to other functions which both allocate and possibly initialize the memory with existing pointers. So unless someone is doing something pretty perverted kstrdup() should also be a fine candidate. [1] http://thread.gmane.org/gmane.linux.kernel/57172 [2] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56955 Signed-off-by: Rasmus Villemoes Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 1 + include/linux/compiler.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 3d5202eda22f..e2949397c19b 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -142,6 +142,7 @@ #if GCC_VERSION >= 30400 #define __must_check __attribute__((warn_unused_result)) +#define __malloc __attribute__((__malloc__)) #endif #if GCC_VERSION >= 40000 diff --git a/include/linux/compiler.h b/include/linux/compiler.h index b5ff9881bef8..793c0829e3a3 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -357,6 +357,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s #define __deprecated_for_modules #endif +#ifndef __malloc +#define __malloc +#endif + /* * Allow us to avoid 'defined but not used' warnings on functions and data, * as well as force them to be emitted to the assembly file. -- cgit v1.2.3 From 48a270554a3251681ae11173f2fd6389d943e183 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 19 May 2016 17:10:55 -0700 Subject: include/linux: apply __malloc attribute Attach the malloc attribute to a few allocation functions. This helps gcc generate better code by telling it that the return value doesn't alias any existing pointers (which is even more valuable given the pessimizations implied by -fno-strict-aliasing). A simple example of what this allows gcc to do can be seen by looking at the last part of drm_atomic_helper_plane_reset: plane->state = kzalloc(sizeof(*plane->state), GFP_KERNEL); if (plane->state) { plane->state->plane = plane; plane->state->rotation = BIT(DRM_ROTATE_0); } which compiles to e8 99 bf d6 ff callq ffffffff8116d540 48 85 c0 test %rax,%rax 48 89 83 40 02 00 00 mov %rax,0x240(%rbx) 74 11 je ffffffff814015c4 48 89 18 mov %rbx,(%rax) 48 8b 83 40 02 00 00 mov 0x240(%rbx),%rax [*] c7 40 40 01 00 00 00 movl $0x1,0x40(%rax) With this patch applied, the instruction at [*] is elided, since the store to plane->state->plane is known to not alter the value of plane->state. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Rasmus Villemoes Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 16 ++++++++-------- include/linux/device.h | 12 ++++++------ include/linux/kernel.h | 4 ++-- include/linux/mempool.h | 3 ++- include/linux/slab.h | 16 ++++++++-------- include/linux/string.h | 2 +- 6 files changed, 27 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 35b22f94d2d2..f9be32691718 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -83,34 +83,34 @@ extern void *__alloc_bootmem(unsigned long size, unsigned long goal); extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, - unsigned long goal); + unsigned long goal) __malloc; extern void *__alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, - unsigned long goal); + unsigned long goal) __malloc; void *__alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, unsigned long align, - unsigned long goal); + unsigned long goal) __malloc; extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, - unsigned long goal); + unsigned long goal) __malloc; void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, - unsigned long limit); + unsigned long limit) __malloc; extern void *__alloc_bootmem_low(unsigned long size, unsigned long align, - unsigned long goal); + unsigned long goal) __malloc; void *__alloc_bootmem_low_nopanic(unsigned long size, unsigned long align, - unsigned long goal); + unsigned long goal) __malloc; extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, - unsigned long goal); + unsigned long goal) __malloc; #ifdef CONFIG_NO_BOOTMEM /* We are using top down, so it is safe to use 0 here */ diff --git a/include/linux/device.h b/include/linux/device.h index b130304f9b1b..ca90ad8bcd61 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -609,14 +609,14 @@ typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data); #ifdef CONFIG_DEBUG_DEVRES extern void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, - int nid, const char *name); + int nid, const char *name) __malloc; #define devres_alloc(release, size, gfp) \ __devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release) #define devres_alloc_node(release, size, gfp, nid) \ __devres_alloc_node(release, size, gfp, nid, #release) #else extern void *devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, - int nid); + int nid) __malloc; static inline void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp) { return devres_alloc_node(release, size, gfp, NUMA_NO_NODE); @@ -648,12 +648,12 @@ extern void devres_remove_group(struct device *dev, void *id); extern int devres_release_group(struct device *dev, void *id); /* managed devm_k.alloc/kfree for device drivers */ -extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp); +extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __malloc; extern __printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, - va_list ap); + va_list ap) __malloc; extern __printf(3, 4) -char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...); +char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...) __malloc; static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp) { return devm_kmalloc(dev, size, gfp | __GFP_ZERO); @@ -671,7 +671,7 @@ static inline void *devm_kcalloc(struct device *dev, return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO); } extern void devm_kfree(struct device *dev, void *p); -extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp); +extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc; extern void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2f7775e229b0..cc7398287fdd 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -412,9 +412,9 @@ extern __printf(3, 4) int scnprintf(char *buf, size_t size, const char *fmt, ...); extern __printf(3, 0) int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); -extern __printf(2, 3) +extern __printf(2, 3) __malloc char *kasprintf(gfp_t gfp, const char *fmt, ...); -extern __printf(2, 0) +extern __printf(2, 0) __malloc char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); extern __printf(2, 0) const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args); diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 69b6951e8fd2..b1086c936507 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -5,6 +5,7 @@ #define _LINUX_MEMPOOL_H #include +#include struct kmem_cache; @@ -31,7 +32,7 @@ extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, extern int mempool_resize(mempool_t *pool, int new_min_nr); extern void mempool_destroy(mempool_t *pool); -extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); +extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc; extern void mempool_free(void *element, mempool_t *pool); /* diff --git a/include/linux/slab.h b/include/linux/slab.h index 508bd827e6dc..aeb3e6d00a66 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -315,8 +315,8 @@ static __always_inline int kmalloc_index(size_t size) } #endif /* !CONFIG_SLOB */ -void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment; -void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment; +void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc; +void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *, void *); /* @@ -339,8 +339,8 @@ static __always_inline void kfree_bulk(size_t size, void **p) } #ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; -void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; +void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc; +void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc; #else static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) { @@ -354,12 +354,12 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f #endif #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment; +extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment __malloc; #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) __assume_slab_alignment; + int node, size_t size) __assume_slab_alignment __malloc; #else static __always_inline void * kmem_cache_alloc_node_trace(struct kmem_cache *s, @@ -392,10 +392,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc; #ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; +extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc; #else static __always_inline void * kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) diff --git a/include/linux/string.h b/include/linux/string.h index d3993a79a325..26b6f6a66f83 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -119,7 +119,7 @@ char *strreplace(char *s, char old, char new); extern void kfree_const(const void *x); -extern char *kstrdup(const char *s, gfp_t gfp); +extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); -- cgit v1.2.3 From 0edaf86cf1a6a97d811fc34765ddbcbc310de564 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 May 2016 17:10:58 -0700 Subject: include/linux/nodemask.h: create next_node_in() helper Lots of code does node = next_node(node, XXX); if (node == MAX_NUMNODES) node = first_node(XXX); so create next_node_in() to do this and use it in various places. [mhocko@suse.com: use next_node_in() helper] Acked-by: Vlastimil Babka Acked-by: Michal Hocko Signed-off-by: Michal Hocko Cc: Xishi Qiu Cc: Joonsoo Kim Cc: David Rientjes Cc: Naoya Horiguchi Cc: Laura Abbott Cc: Hui Zhu Cc: Wang Xiaoqiang Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nodemask.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 6e85889cf9ab..f746e44d4046 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -43,8 +43,10 @@ * * int first_node(mask) Number lowest set bit, or MAX_NUMNODES * int next_node(node, mask) Next node past 'node', or MAX_NUMNODES + * int next_node_in(node, mask) Next node past 'node', or wrap to first, + * or MAX_NUMNODES * int first_unset_node(mask) First node not set in mask, or - * MAX_NUMNODES. + * MAX_NUMNODES * * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set * NODE_MASK_ALL Initializer - all bits set @@ -259,6 +261,13 @@ static inline int __next_node(int n, const nodemask_t *srcp) return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } +/* + * Find the next present node in src, starting after node n, wrapping around to + * the first node in src if needed. Returns MAX_NUMNODES if src is empty. + */ +#define next_node_in(n, src) __next_node_in((n), &(src)) +int __next_node_in(int node, const nodemask_t *srcp); + static inline void init_nodemask_of_node(nodemask_t *mask, int node) { nodes_clear(*mask); -- cgit v1.2.3 From 9fee021d15ddd884d40d1540913474e8112313fe Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Thu, 19 May 2016 17:11:04 -0700 Subject: mm/hugetlb: introduce hugetlb_bad_size() When any unsupported hugepage size is specified, 'hugepagesz=' and 'hugepages=' should be ignored during command line parsing until any supported hugepage size is found. But currently incorrect number of hugepages are allocated when unsupported size is specified as it fails to ignore the 'hugepages=' command. Test case: Note that this is specific to x86 architecture. Boot the kernel with command line option 'hugepagesz=256M hugepages=X'. After boot, dmesg output shows that X number of hugepages of the size 2M is pre-allocated instead of 0. So, to handle such command line options, introduce new routine hugetlb_bad_size. The routine hugetlb_bad_size sets the global variable parsed_valid_hugepagesz. We are using parsed_valid_hugepagesz to save the state when unsupported hugepagesize is found so that we can ignore the 'hugepages=' parameters after that and then reset the variable when supported hugepage size is found. The routine hugetlb_bad_size can be called while setting 'hugepagesz=' parameter in an architecture specific code. Signed-off-by: Vaishali Thakkar Reviewed-by: Mike Kravetz Reviewed-by: Naoya Horiguchi Acked-by: Michal Hocko Cc: Hillf Danton Cc: Yaowei Bai Cc: Dominik Dingel Cc: Kirill A. Shutemov Cc: Paul Gortmaker Cc: Dave Hansen Cc: Benjamin Herrenschmidt Cc: James Hogan Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7d953c2542a8..e44c57876e89 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -338,6 +338,7 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, /* arch callback */ int __init alloc_bootmem_huge_page(struct hstate *h); +void __init hugetlb_bad_size(void); void __init hugetlb_add_hstate(unsigned order); struct hstate *size_to_hstate(unsigned long size); -- cgit v1.2.3 From 32f6271dbdc351dce96b11c5f3567bae8188004f Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 19 May 2016 17:11:23 -0700 Subject: mm/hugetlb: is_vm_hugetlb_page() can return bool Make is_vm_hugetlb_page() return bool to improve readability due to this particular function only using either one or zero as its return value. Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb_inline.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h index 2bb681fbeb35..a4e7ca0f3585 100644 --- a/include/linux/hugetlb_inline.h +++ b/include/linux/hugetlb_inline.h @@ -5,16 +5,16 @@ #include -static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) +static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) { return !!(vma->vm_flags & VM_HUGETLB); } #else -static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) +static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) { - return 0; + return false; } #endif -- cgit v1.2.3 From c98940f6fa3d06fa8fec75aa2362b25227573d06 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 19 May 2016 17:11:26 -0700 Subject: mm/memory_hotplug: is_mem_section_removable() can return bool Make is_mem_section_removable() return bool to improve readability due to this particular function only using either one or zero as its return value. Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index adbef586e696..20d8a5d4d133 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -247,16 +247,16 @@ static inline void mem_hotplug_done(void) {} #ifdef CONFIG_MEMORY_HOTREMOVE -extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); +extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); extern void try_offline_node(int nid); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern void remove_memory(int nid, u64 start, u64 size); #else -static inline int is_mem_section_removable(unsigned long pfn, +static inline bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages) { - return 0; + return false; } static inline void try_offline_node(int nid) {} -- cgit v1.2.3 From bb00a789e565b96c52b2224c2280f7ac83175bec Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 19 May 2016 17:11:29 -0700 Subject: mm/vmalloc.c: is_vmalloc_addr() can return bool Make is_vmalloc_addr() return bool to improve readability due to this particular function only using either one or zero as its return value. Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1193a54ea2b3..5b375133c695 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -447,14 +447,14 @@ unsigned long vmalloc_to_pfn(const void *addr); * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there * is no special casing required. */ -static inline int is_vmalloc_addr(const void *x) +static inline bool is_vmalloc_addr(const void *x) { #ifdef CONFIG_MMU unsigned long addr = (unsigned long)x; return addr >= VMALLOC_START && addr < VMALLOC_END; #else - return 0; + return false; #endif } #ifdef CONFIG_MMU -- cgit v1.2.3 From 4ee815be1d34a6f254b3d09bdebcb27f294f2bd3 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 19 May 2016 17:11:32 -0700 Subject: mm/mempolicy.c: vma_migratable() can return bool Make vma_migratable() return bool due to this particular function only using either one or zero as its return value. Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 2696c1f05ed1..6978a99e571f 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -172,14 +172,14 @@ extern int mpol_parse_str(char *str, struct mempolicy **mpol); extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ -static inline int vma_migratable(struct vm_area_struct *vma) +static inline bool vma_migratable(struct vm_area_struct *vma) { if (vma->vm_flags & (VM_IO | VM_PFNMAP)) - return 0; + return false; #ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION if (vma->vm_flags & VM_HUGETLB) - return 0; + return false; #endif /* @@ -190,8 +190,8 @@ static inline int vma_migratable(struct vm_area_struct *vma) if (vma->vm_file && gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) < policy_zone) - return 0; - return 1; + return false; + return true; } extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); -- cgit v1.2.3 From 29f9cb53d25cd9916537b44b0af7f0b95a2e4438 Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Thu, 19 May 2016 17:11:57 -0700 Subject: mm/highmem: simplify is_highmem() is_highmem() can be simplified by use of is_highmem_idx(). This patch removes redundant code and will make it easier to maintain if the zone policy is changed or a new zone is added. (akpm: saves me 25 bytes of text per is_highmem() callsite) Signed-off-by: Chanho Min Reviewed-by: Dan Williams Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c60df9257cc7..150c6049f961 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -828,10 +828,7 @@ static inline int is_highmem_idx(enum zone_type idx) static inline int is_highmem(struct zone *zone) { #ifdef CONFIG_HIGHMEM - int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones; - return zone_off == ZONE_HIGHMEM * sizeof(*zone) || - (zone_off == ZONE_MOVABLE * sizeof(*zone) && - zone_movable_is_highmem()); + return is_highmem_idx(zone_idx(zone)); #else return 0; #endif -- cgit v1.2.3 From 1aa8aea535977f0e0b398f39d052e7befff81da6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 May 2016 17:12:00 -0700 Subject: mm: uninline page_mapped() It's huge. Uninlining it saves 206 bytes per callsite. Shaves 4924 bytes from the x86_64 allmodconfig vmlinux. [akpm@linux-foundation.org: coding-style fixes] Cc: Steve Capper Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5b375133c695..9c2852cabf01 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1032,26 +1032,7 @@ static inline pgoff_t page_file_index(struct page *page) return page->index; } -/* - * Return true if this page is mapped into pagetables. - * For compound page it returns true if any subpage of compound page is mapped. - */ -static inline bool page_mapped(struct page *page) -{ - int i; - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) >= 0; - page = compound_head(page); - if (atomic_read(compound_mapcount_ptr(page)) >= 0) - return true; - if (PageHuge(page)) - return false; - for (i = 0; i < hpage_nr_pages(page); i++) { - if (atomic_read(&page[i]._mapcount) >= 0) - return true; - } - return false; -} +bool page_mapped(struct page *page); /* * Return true only if the page has been allocated with -- cgit v1.2.3 From ca707239e8a7958ffb1c31737d41cae1a674c938 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 19 May 2016 17:12:35 -0700 Subject: mm: update_lru_size warn and reset bad lru_size Though debug kernels have a VM_BUG_ON to help protect from misaccounting lru_size, non-debug kernels are liable to wrap it around: and then the vast unsigned long size draws page reclaim into a loop of repeatedly doing nothing on an empty list, without even a cond_resched(). That soft lockup looks confusingly like an over-busy reclaim scenario, with lots of contention on the lru_lock in shrink_inactive_list(): yet has a totally different origin. Help differentiate with a custom warning in mem_cgroup_update_lru_size(), even in non-debug kernels; and reset the size to avoid the lockup. But the particular bug which suggested this change was mine alone, and since fixed. Make it a WARN_ONCE: the first occurrence is the most informative, a flurry may follow, yet even when rate-limited little more is learnt. Signed-off-by: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Andres Lagar-Cavilla Cc: Yang Shi Cc: Ning Qu Cc: Mel Gorman Cc: Andres Lagar-Cavilla Cc: Konstantin Khlebnikov Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 712e8c37a200..d8cea81ab1ac 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -35,8 +35,8 @@ static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { int nr_pages = hpage_nr_pages(page); - mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); list_del(&page->lru); + mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, -nr_pages); } -- cgit v1.2.3 From 9d5e6a9f22311b00a20ff9b072760ad3e73f0d99 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 19 May 2016 17:12:38 -0700 Subject: mm: update_lru_size do the __mod_zone_page_state Konstantin Khlebnikov pointed out (nearly four years ago, when lumpy reclaim was removed) that lru_size can be updated by -nr_taken once per call to isolate_lru_pages(), instead of page by page. Update it inside isolate_lru_pages(), or at its two callsites? I chose to update it at the callsites, rearranging and grouping the updates by nr_taken and nr_scanned together in both. With one exception, mem_cgroup_update_lru_size(,lru,) is then used where __mod_zone_page_state(,NR_LRU_BASE+lru,) is used; and we shall be adding some more calls in a future commit. Make the code a little smaller and simpler by incorporating stat update in lru_size update. The exception was move_active_pages_to_lru(), which aggregated the pgmoved stat update separately from the individual lru_size updates; but I still think this a simplification worth making. However, the __mod_zone_page_state is not peculiar to mem_cgroups: so better use the name update_lru_size, calls mem_cgroup_update_lru_size when CONFIG_MEMCG. Signed-off-by: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Andres Lagar-Cavilla Cc: Yang Shi Cc: Ning Qu Cc: Mel Gorman Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ------ include/linux/mm_inline.h | 24 ++++++++++++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1191d79aa495..94da96738df3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -658,12 +658,6 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) return 0; } -static inline void -mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, - int increment) -{ -} - static inline unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index d8cea81ab1ac..5bd29ba4f174 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -22,22 +22,34 @@ static inline int page_is_file_cache(struct page *page) return !PageSwapBacked(page); } +static __always_inline void __update_lru_size(struct lruvec *lruvec, + enum lru_list lru, int nr_pages) +{ + __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages); +} + +static __always_inline void update_lru_size(struct lruvec *lruvec, + enum lru_list lru, int nr_pages) +{ +#ifdef CONFIG_MEMCG + mem_cgroup_update_lru_size(lruvec, lru, nr_pages); +#else + __update_lru_size(lruvec, lru, nr_pages); +#endif +} + static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { - int nr_pages = hpage_nr_pages(page); - mem_cgroup_update_lru_size(lruvec, lru, nr_pages); + update_lru_size(lruvec, lru, hpage_nr_pages(page)); list_add(&page->lru, &lruvec->lists[lru]); - __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages); } static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { - int nr_pages = hpage_nr_pages(page); list_del(&page->lru); - mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); - __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, -nr_pages); + update_lru_size(lruvec, lru, -hpage_nr_pages(page)); } /** -- cgit v1.2.3 From 75edd345e8ede51bc8f00672feff5d622f2b3af6 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 19 May 2016 17:12:44 -0700 Subject: tmpfs: preliminary minor tidyups Make a few cleanups in mm/shmem.c, before going on to complicate it. shmem_alloc_page() will become more complicated: we can't afford to to have that complication duplicated between a CONFIG_NUMA version and a !CONFIG_NUMA version, so rearrange the #ifdef'ery there to yield a single shmem_swapin() and a single shmem_alloc_page(). Yes, it's a shame to inflict the horrid pseudo-vma on non-NUMA configurations, but eliminating it is a larger cleanup: I have an alloc_pages_mpol() patchset not yet ready - mpol handling is subtle and bug-prone, and changed yet again since my last version. Move __SetPageLocked, __SetPageSwapBacked from shmem_getpage_gfp() to shmem_alloc_page(): that SwapBacked flag will be useful in future, to help to distinguish different cases appropriately. And the SGP_DIRTY variant of SGP_CACHE is hard to understand and of little use (IIRC it dates back to when shmem_getpage() returned the page unlocked): kill it and do the necessary in shmem_file_read_iter(). But an arm64 build then complained that info may be uninitialized (where shmem_getpage_gfp() deletes a freshly alloced page beyond eof), and advancing to an "sgp <= SGP_CACHE" test jogged it back to reality. Signed-off-by: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Andres Lagar-Cavilla Cc: Yang Shi Cc: Ning Qu Cc: Mel Gorman Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 6978a99e571f..4429d255c8ab 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -228,6 +228,12 @@ static inline void mpol_free_shared_policy(struct shared_policy *p) { } +static inline struct mempolicy * +mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) +{ + return NULL; +} + #define vma_policy(vma) NULL static inline int -- cgit v1.2.3 From 52b6f46bc163eef17ecba4cd552beeafe2b24453 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 19 May 2016 17:12:50 -0700 Subject: mm: /proc/sys/vm/stat_refresh to force vmstat update Provide /proc/sys/vm/stat_refresh to force an immediate update of per-cpu into global vmstats: useful to avoid a sleep(2) or whatever before checking counts when testing. Originally added to work around a bug which left counts stranded indefinitely on a cpu going idle (an inaccuracy magnified when small below-batch numbers represent "huge" amounts of memory), but I believe that bug is now fixed: nonetheless, this is still a useful knob. Its schedule_on_each_cpu() is probably too expensive just to fold into reading /proc/meminfo itself: give this mode 0600 to prevent abuse. Allow a write or a read to do the same: nothing to read, but "grep -h Shmem /proc/sys/vm/stat_refresh /proc/meminfo" is convenient. Oh, and since global_page_state() itself is careful to disguise any underflow as 0, hack in an "Invalid argument" and pr_warn() if a counter is negative after the refresh - this helped to fix a misaccounting of NR_ISOLATED_FILE in my migration code. But on recent kernels, I find that NR_ALLOC_BATCH and NR_PAGES_SCANNED often go negative some of the time. I have not yet worked out why, but have no evidence that it's actually harmful. Punt for the moment by just ignoring the anomaly on those. Signed-off-by: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Andres Lagar-Cavilla Cc: Yang Shi Cc: Ning Qu Cc: Mel Gorman Cc: Andres Lagar-Cavilla Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 73fae8c4a5fb..02fce415b3d9 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -193,6 +193,10 @@ void quiet_vmstat(void); void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); +struct ctl_table; +int vmstat_refresh(struct ctl_table *, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); int calculate_pressure_threshold(struct zone *zone); -- cgit v1.2.3 From bf8616d5fa179d6c755f06726567c6d63c6fbbc7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 19 May 2016 17:12:54 -0700 Subject: huge mm: move_huge_pmd does not need new_vma Remove move_huge_pmd()'s redundant new_vma arg: all it was used for was a VM_NOHUGEPAGE check on new_vma flags, but the new_vma is cloned from the old vma, so a trans_huge_pmd in the new_vma will be as acceptable as it was in the old vma, alignment and size permitting. Signed-off-by: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Andres Lagar-Cavilla Cc: Yang Shi Cc: Ning Qu Cc: Mel Gorman Cc: Andres Lagar-Cavilla Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index d7b9e5346fba..419fb9e03447 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -28,9 +28,7 @@ extern int zap_huge_pmd(struct mmu_gather *tlb, extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec); -extern bool move_huge_pmd(struct vm_area_struct *vma, - struct vm_area_struct *new_vma, - unsigned long old_addr, +extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, -- cgit v1.2.3 From 3ef22dfff2390e75b379f9715388a852aa56e0d5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 19 May 2016 17:13:12 -0700 Subject: oom, oom_reaper: try to reap tasks which skip regular OOM killer path If either the current task is already killed or PF_EXITING or a selected task is PF_EXITING then the oom killer is suppressed and so is the oom reaper. This patch adds try_oom_reaper which checks the given task and queues it for the oom reaper if that is safe to be done meaning that the task doesn't share the mm with an alive process. This might help to release the memory pressure while the task tries to exit. [akpm@linux-foundation.org: fix nommu build] Signed-off-by: Michal Hocko Cc: Raushaniya Maksudova Cc: Michael S. Tsirkin Cc: Paul E. McKenney Cc: David Rientjes Cc: Tetsuo Handa Cc: Daniel Vetter Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 628a43242a34..83b9c39bd8b7 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -72,6 +72,14 @@ static inline bool oom_task_origin(const struct task_struct *p) extern void mark_oom_victim(struct task_struct *tsk); +#ifdef CONFIG_MMU +extern void try_oom_reaper(struct task_struct *tsk); +#else +static inline void try_oom_reaper(struct task_struct *tsk) +{ +} +#endif + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); -- cgit v1.2.3 From 175145748d00794369317070dd19ce12dd816241 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 19 May 2016 17:13:21 -0700 Subject: mm, page_alloc: use new PageAnonHead helper in the free page fast path The PageAnon check always checks for compound_head but this is a relatively expensive check if the caller already knows the page is a head page. This patch creates a helper and uses it in the page free path which only operates on head pages. With this patch and "Only check PageCompound for high-order pages", the performance difference on a page allocator microbenchmark is; 4.6.0-rc2 4.6.0-rc2 vanilla nocompound-v1r20 Min alloc-odr0-1 425.00 ( 0.00%) 417.00 ( 1.88%) Min alloc-odr0-2 313.00 ( 0.00%) 308.00 ( 1.60%) Min alloc-odr0-4 257.00 ( 0.00%) 253.00 ( 1.56%) Min alloc-odr0-8 224.00 ( 0.00%) 221.00 ( 1.34%) Min alloc-odr0-16 208.00 ( 0.00%) 205.00 ( 1.44%) Min alloc-odr0-32 199.00 ( 0.00%) 199.00 ( 0.00%) Min alloc-odr0-64 195.00 ( 0.00%) 193.00 ( 1.03%) Min alloc-odr0-128 192.00 ( 0.00%) 191.00 ( 0.52%) Min alloc-odr0-256 204.00 ( 0.00%) 200.00 ( 1.96%) Min alloc-odr0-512 213.00 ( 0.00%) 212.00 ( 0.47%) Min alloc-odr0-1024 219.00 ( 0.00%) 219.00 ( 0.00%) Min alloc-odr0-2048 225.00 ( 0.00%) 225.00 ( 0.00%) Min alloc-odr0-4096 230.00 ( 0.00%) 231.00 ( -0.43%) Min alloc-odr0-8192 235.00 ( 0.00%) 234.00 ( 0.43%) Min alloc-odr0-16384 235.00 ( 0.00%) 234.00 ( 0.43%) Min free-odr0-1 215.00 ( 0.00%) 191.00 ( 11.16%) Min free-odr0-2 152.00 ( 0.00%) 136.00 ( 10.53%) Min free-odr0-4 119.00 ( 0.00%) 107.00 ( 10.08%) Min free-odr0-8 106.00 ( 0.00%) 96.00 ( 9.43%) Min free-odr0-16 97.00 ( 0.00%) 87.00 ( 10.31%) Min free-odr0-32 91.00 ( 0.00%) 83.00 ( 8.79%) Min free-odr0-64 89.00 ( 0.00%) 81.00 ( 8.99%) Min free-odr0-128 88.00 ( 0.00%) 80.00 ( 9.09%) Min free-odr0-256 106.00 ( 0.00%) 95.00 ( 10.38%) Min free-odr0-512 116.00 ( 0.00%) 111.00 ( 4.31%) Min free-odr0-1024 125.00 ( 0.00%) 118.00 ( 5.60%) Min free-odr0-2048 133.00 ( 0.00%) 126.00 ( 5.26%) Min free-odr0-4096 136.00 ( 0.00%) 130.00 ( 4.41%) Min free-odr0-8192 138.00 ( 0.00%) 130.00 ( 5.80%) Min free-odr0-16384 137.00 ( 0.00%) 130.00 ( 5.11%) There is a sizable boost to the free allocator performance. While there is an apparent boost on the allocation side, it's likely a co-incidence or due to the patches slightly reducing cache footprint. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6b052aa7b5b7..a61e06e5fbce 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -371,10 +371,15 @@ PAGEFLAG(Idle, idle, PF_ANY) #define PAGE_MAPPING_KSM 2 #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM) +static __always_inline int PageAnonHead(struct page *page) +{ + return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; +} + static __always_inline int PageAnon(struct page *page) { page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; + return PageAnonHead(page); } #ifdef CONFIG_KSM -- cgit v1.2.3 From 060e74173f292fb3e0398b3dca8765568d195ff1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 19 May 2016 17:13:27 -0700 Subject: mm, page_alloc: inline zone_statistics zone_statistics has one call-site but it's a public function. Make it static and inline. The performance difference on a page allocator microbenchmark is; 4.6.0-rc2 4.6.0-rc2 statbranch-v1r20 statinline-v1r20 Min alloc-odr0-1 419.00 ( 0.00%) 412.00 ( 1.67%) Min alloc-odr0-2 305.00 ( 0.00%) 301.00 ( 1.31%) Min alloc-odr0-4 250.00 ( 0.00%) 247.00 ( 1.20%) Min alloc-odr0-8 219.00 ( 0.00%) 215.00 ( 1.83%) Min alloc-odr0-16 203.00 ( 0.00%) 199.00 ( 1.97%) Min alloc-odr0-32 195.00 ( 0.00%) 191.00 ( 2.05%) Min alloc-odr0-64 191.00 ( 0.00%) 187.00 ( 2.09%) Min alloc-odr0-128 189.00 ( 0.00%) 185.00 ( 2.12%) Min alloc-odr0-256 198.00 ( 0.00%) 193.00 ( 2.53%) Min alloc-odr0-512 210.00 ( 0.00%) 207.00 ( 1.43%) Min alloc-odr0-1024 216.00 ( 0.00%) 213.00 ( 1.39%) Min alloc-odr0-2048 221.00 ( 0.00%) 220.00 ( 0.45%) Min alloc-odr0-4096 227.00 ( 0.00%) 226.00 ( 0.44%) Min alloc-odr0-8192 232.00 ( 0.00%) 229.00 ( 1.29%) Min alloc-odr0-16384 232.00 ( 0.00%) 229.00 ( 1.29%) Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 02fce415b3d9..d2da8e053210 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -163,12 +163,10 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, #ifdef CONFIG_NUMA extern unsigned long node_page_state(int node, enum zone_stat_item item); -extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp); #else #define node_page_state(node, item) global_page_state(item) -#define zone_statistics(_zl, _z, gfp) do { } while (0) #endif /* CONFIG_NUMA */ -- cgit v1.2.3 From 682a3385e7734fa3abbd504cbeb5fe91793f1827 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 19 May 2016 17:13:30 -0700 Subject: mm, page_alloc: inline the fast path of the zonelist iterator The page allocator iterates through a zonelist for zones that match the addressing limitations and nodemask of the caller but many allocations will not be restricted. Despite this, there is always functional call overhead which builds up. This patch inlines the optimistic basic case and only calls the iterator function for the complex case. A hindrance was the fact that cpuset_current_mems_allowed is used in the fastpath as the allowed nodemask even though all nodes are allowed on most systems. The patch handles this by only considering cpuset_current_mems_allowed if a cpuset exists. As well as being faster in the fast-path, this removes some junk in the slowpath. The performance difference on a page allocator microbenchmark is; 4.6.0-rc2 4.6.0-rc2 statinline-v1r20 optiter-v1r20 Min alloc-odr0-1 412.00 ( 0.00%) 382.00 ( 7.28%) Min alloc-odr0-2 301.00 ( 0.00%) 282.00 ( 6.31%) Min alloc-odr0-4 247.00 ( 0.00%) 233.00 ( 5.67%) Min alloc-odr0-8 215.00 ( 0.00%) 203.00 ( 5.58%) Min alloc-odr0-16 199.00 ( 0.00%) 188.00 ( 5.53%) Min alloc-odr0-32 191.00 ( 0.00%) 182.00 ( 4.71%) Min alloc-odr0-64 187.00 ( 0.00%) 177.00 ( 5.35%) Min alloc-odr0-128 185.00 ( 0.00%) 175.00 ( 5.41%) Min alloc-odr0-256 193.00 ( 0.00%) 184.00 ( 4.66%) Min alloc-odr0-512 207.00 ( 0.00%) 197.00 ( 4.83%) Min alloc-odr0-1024 213.00 ( 0.00%) 203.00 ( 4.69%) Min alloc-odr0-2048 220.00 ( 0.00%) 209.00 ( 5.00%) Min alloc-odr0-4096 226.00 ( 0.00%) 214.00 ( 5.31%) Min alloc-odr0-8192 229.00 ( 0.00%) 218.00 ( 4.80%) Min alloc-odr0-16384 229.00 ( 0.00%) 219.00 ( 4.37%) perf indicated that next_zones_zonelist disappeared in the profile and __next_zones_zonelist did not appear. This is expected as the micro-benchmark would hit the inlined fast-path every time. Signed-off-by: Mel Gorman Cc: Vlastimil Babka Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 150c6049f961..cfcd7723edb6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -919,6 +919,10 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) #endif /* CONFIG_NUMA */ } +struct zoneref *__next_zones_zonelist(struct zoneref *z, + enum zone_type highest_zoneidx, + nodemask_t *nodes); + /** * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point * @z - The cursor used as a starting point for the search @@ -931,9 +935,14 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) * being examined. It should be advanced by one before calling * next_zones_zonelist again. */ -struct zoneref *next_zones_zonelist(struct zoneref *z, +static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, - nodemask_t *nodes); + nodemask_t *nodes) +{ + if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx)) + return z; + return __next_zones_zonelist(z, highest_zoneidx, nodes); +} /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist -- cgit v1.2.3 From c603844bdcb5238980de8d58b393f52d7729d651 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 19 May 2016 17:13:38 -0700 Subject: mm, page_alloc: convert alloc_flags to unsigned alloc_flags is a bitmask of flags but it is signed which does not necessarily generate the best code depending on the compiler. Even without an impact, it makes more sense that this be unsigned. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 6 +++--- include/linux/mmzone.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index d7c8de583a23..242b660f64e6 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -39,12 +39,12 @@ extern int sysctl_compact_unevictable_allowed; extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, - int alloc_flags, const struct alloc_context *ac, - enum migrate_mode mode, int *contended); + unsigned int alloc_flags, const struct alloc_context *ac, + enum migrate_mode mode, int *contended); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order, - int alloc_flags, int classzone_idx); + unsigned int alloc_flags, int classzone_idx); extern void defer_compaction(struct zone *zone, int order); extern bool compaction_deferred(struct zone *zone, int order); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cfcd7723edb6..327f0fa1e1ce 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -747,7 +747,8 @@ extern struct mutex zonelists_mutex; void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); bool zone_watermark_ok(struct zone *z, unsigned int order, - unsigned long mark, int classzone_idx, int alloc_flags); + unsigned long mark, int classzone_idx, + unsigned int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx); enum memmap_context { -- cgit v1.2.3 From 09940a4f1e816abe3248fa0d185fc0e7f54c8c12 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 19 May 2016 17:13:53 -0700 Subject: mm, page_alloc: simplify last cpupid reset The current reset unnecessarily clears flags and makes pointless calculations. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9c2852cabf01..2b97be1147ec 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -850,10 +850,7 @@ extern int page_cpupid_xchg_last(struct page *page, int cpupid); static inline void page_cpupid_reset_last(struct page *page) { - int cpupid = (1 << LAST_CPUPID_SHIFT) - 1; - - page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT); - page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT; + page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT; } #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ #else /* !CONFIG_NUMA_BALANCING */ -- cgit v1.2.3 From c33d6c06f60f710f0305ae792773e1c2560e1e51 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 19 May 2016 17:14:10 -0700 Subject: mm, page_alloc: avoid looking up the first zone in a zonelist twice The allocator fast path looks up the first usable zone in a zonelist and then get_page_from_freelist does the same job in the zonelist iterator. This patch preserves the necessary information. 4.6.0-rc2 4.6.0-rc2 fastmark-v1r20 initonce-v1r20 Min alloc-odr0-1 364.00 ( 0.00%) 359.00 ( 1.37%) Min alloc-odr0-2 262.00 ( 0.00%) 260.00 ( 0.76%) Min alloc-odr0-4 214.00 ( 0.00%) 214.00 ( 0.00%) Min alloc-odr0-8 186.00 ( 0.00%) 186.00 ( 0.00%) Min alloc-odr0-16 173.00 ( 0.00%) 173.00 ( 0.00%) Min alloc-odr0-32 165.00 ( 0.00%) 165.00 ( 0.00%) Min alloc-odr0-64 161.00 ( 0.00%) 162.00 ( -0.62%) Min alloc-odr0-128 159.00 ( 0.00%) 161.00 ( -1.26%) Min alloc-odr0-256 168.00 ( 0.00%) 170.00 ( -1.19%) Min alloc-odr0-512 180.00 ( 0.00%) 181.00 ( -0.56%) Min alloc-odr0-1024 190.00 ( 0.00%) 190.00 ( 0.00%) Min alloc-odr0-2048 196.00 ( 0.00%) 196.00 ( 0.00%) Min alloc-odr0-4096 202.00 ( 0.00%) 202.00 ( 0.00%) Min alloc-odr0-8192 206.00 ( 0.00%) 205.00 ( 0.49%) Min alloc-odr0-16384 206.00 ( 0.00%) 205.00 ( 0.49%) The benefit is negligible and the results are within the noise but each cycle counts. Signed-off-by: Mel Gorman Cc: Vlastimil Babka Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 327f0fa1e1ce..4b28d2f8125e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -959,13 +959,10 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, - nodemask_t *nodes, - struct zone **zone) + nodemask_t *nodes) { - struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs, + return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes); - *zone = zonelist_zone(z); - return z; } /** @@ -980,10 +977,17 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, * within a given nodemask */ #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ - for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ + for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z); \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ - zone = zonelist_zone(z)) \ + zone = zonelist_zone(z)) + +#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ + for (zone = z->zone; \ + zone; \ + z = next_zones_zonelist(++z, highidx, nodemask), \ + zone = zonelist_zone(z)) + /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index -- cgit v1.2.3 From 0b423ca22f95a867f789aab1fe57ee4e378df43b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 19 May 2016 17:14:27 -0700 Subject: mm, page_alloc: inline pageblock lookup in page free fast paths The function call overhead of get_pfnblock_flags_mask() is measurable in the page free paths. This patch uses an inlined version that is faster. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4b28d2f8125e..c60db2096fd8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -85,13 +85,6 @@ extern int page_group_by_mobility_disabled; get_pfnblock_flags_mask(page, page_to_pfn(page), \ PB_migrate_end, MIGRATETYPE_MASK) -static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn) -{ - BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2); - return get_pfnblock_flags_mask(page, pfn, PB_migrate_end, - MIGRATETYPE_MASK); -} - struct free_area { struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; -- cgit v1.2.3 From 002f290627c27068087f6204baec7a334e5a3b48 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 19 May 2016 17:14:30 -0700 Subject: cpuset: use static key better and convert to new API An important function for cpusets is cpuset_node_allowed(), which optimizes on the fact if there's a single root CPU set, it must be trivially allowed. But the check "nr_cpusets() <= 1" doesn't use the cpusets_enabled_key static key the right way where static keys eliminate branching overhead with jump labels. This patch converts it so that static key is used properly. It's also switched to the new static key API and the checking functions are converted to return bool instead of int. We also provide a new variant __cpuset_zone_allowed() which expects that the static key check was already done and they key was enabled. This is needed for get_page_from_freelist() where we want to also avoid the relatively slower check when ALLOC_CPUSET is not set in alloc_flags. The impact on the page allocator microbenchmark is less than expected but the cleanup in itself is worthwhile. 4.6.0-rc2 4.6.0-rc2 multcheck-v1r20 cpuset-v1r20 Min alloc-odr0-1 348.00 ( 0.00%) 348.00 ( 0.00%) Min alloc-odr0-2 254.00 ( 0.00%) 254.00 ( 0.00%) Min alloc-odr0-4 213.00 ( 0.00%) 213.00 ( 0.00%) Min alloc-odr0-8 186.00 ( 0.00%) 183.00 ( 1.61%) Min alloc-odr0-16 173.00 ( 0.00%) 171.00 ( 1.16%) Min alloc-odr0-32 166.00 ( 0.00%) 163.00 ( 1.81%) Min alloc-odr0-64 162.00 ( 0.00%) 159.00 ( 1.85%) Min alloc-odr0-128 160.00 ( 0.00%) 157.00 ( 1.88%) Min alloc-odr0-256 169.00 ( 0.00%) 166.00 ( 1.78%) Min alloc-odr0-512 180.00 ( 0.00%) 180.00 ( 0.00%) Min alloc-odr0-1024 188.00 ( 0.00%) 187.00 ( 0.53%) Min alloc-odr0-2048 194.00 ( 0.00%) 193.00 ( 0.52%) Min alloc-odr0-4096 199.00 ( 0.00%) 198.00 ( 0.50%) Min alloc-odr0-8192 202.00 ( 0.00%) 201.00 ( 0.50%) Min alloc-odr0-16384 203.00 ( 0.00%) 202.00 ( 0.49%) Signed-off-by: Vlastimil Babka Signed-off-by: Mel Gorman Acked-by: Zefan Li Cc: Jesper Dangaard Brouer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 85a868ccb493..bfc204e70338 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -16,26 +16,26 @@ #ifdef CONFIG_CPUSETS -extern struct static_key cpusets_enabled_key; +extern struct static_key_false cpusets_enabled_key; static inline bool cpusets_enabled(void) { - return static_key_false(&cpusets_enabled_key); + return static_branch_unlikely(&cpusets_enabled_key); } static inline int nr_cpusets(void) { /* jump label reference count + the top-level cpuset */ - return static_key_count(&cpusets_enabled_key) + 1; + return static_key_count(&cpusets_enabled_key.key) + 1; } static inline void cpuset_inc(void) { - static_key_slow_inc(&cpusets_enabled_key); + static_branch_inc(&cpusets_enabled_key); } static inline void cpuset_dec(void) { - static_key_slow_dec(&cpusets_enabled_key); + static_branch_dec(&cpusets_enabled_key); } extern int cpuset_init(void); @@ -48,16 +48,25 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p); void cpuset_init_current_mems_allowed(void); int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); -extern int __cpuset_node_allowed(int node, gfp_t gfp_mask); +extern bool __cpuset_node_allowed(int node, gfp_t gfp_mask); -static inline int cpuset_node_allowed(int node, gfp_t gfp_mask) +static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask) { - return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask); + if (cpusets_enabled()) + return __cpuset_node_allowed(node, gfp_mask); + return true; } -static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { - return cpuset_node_allowed(zone_to_nid(z), gfp_mask); + return __cpuset_node_allowed(zone_to_nid(z), gfp_mask); +} + +static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +{ + if (cpusets_enabled()) + return __cpuset_zone_allowed(z, gfp_mask); + return true; } extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, @@ -172,14 +181,19 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) return 1; } -static inline int cpuset_node_allowed(int node, gfp_t gfp_mask) +static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask) { - return 1; + return true; } -static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { - return 1; + return true; +} + +static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +{ + return true; } static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, -- cgit v1.2.3 From 44bfc42e94cd76a0bd44f3fce98d4a7b76f31bc0 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 4 May 2016 14:35:48 +0100 Subject: KVM: arm/arm64: move GICv2 emulation defines into arm-gic-v3.h As (some) GICv3 hosts can emulate a GICv2, some GICv2 specific masks for the list register definition also apply to GICv3 LRs. At the moment we have those definitions in the KVM VGICv3 implementation, so let's move them into the GICv3 header file to have them automatically defined. Signed-off-by: Andre Przywara Acked-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index d5d798b35c1f..ec938d14da5d 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -276,6 +276,11 @@ #define ICH_LR_PHYS_ID_SHIFT 32 #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + #define ICH_MISR_EOI (1 << 0) #define ICH_MISR_U (1 << 1) -- cgit v1.2.3 From 140b086dd19771410915a924db2e635c2b51a0f4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 26 Nov 2015 17:19:25 +0000 Subject: KVM: arm/arm64: vgic-new: Add GICv2 world switch backend Processing maintenance interrupts and accessing the list registers are dependent on the host's GIC version. Introduce vgic-v2.c to contain GICv2 specific functions. Implement the GICv2 specific code for syncing the emulation state into the VGIC registers. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Eric Auger Signed-off-by: Andre Przywara Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall --- include/linux/irqchip/arm-gic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 9c940263ca23..be0d26f940af 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -76,6 +76,7 @@ #define GICH_LR_VIRTUALID (0x3ff << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) #define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) +#define GICH_LR_PRIORITY_SHIFT 23 #define GICH_LR_STATE (3 << 28) #define GICH_LR_PENDING_BIT (1 << 28) #define GICH_LR_ACTIVE_BIT (1 << 29) -- cgit v1.2.3 From 59529f69f5048e50dcde3434661981c01f8208b4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 30 Nov 2015 13:09:53 +0000 Subject: KVM: arm/arm64: vgic-new: Add GICv3 world switch backend As the GICv3 virtual interface registers differ from their GICv2 siblings, we need different handlers for processing maintenance interrupts and reading/writing to the LRs. Implement the respective handler functions and connect them to existing code to be called if the host is using a GICv3. Signed-off-by: Marc Zyngier Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index ec938d14da5d..35e93cfa1742 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -275,6 +275,7 @@ #define ICH_LR_ACTIVE_BIT (1ULL << 63) #define ICH_LR_PHYS_ID_SHIFT 32 #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) +#define ICH_LR_PRIORITY_SHIFT 48 /* These are for GICv2 emulation only */ #define GICH_LR_VIRTUALID (0x3ffUL << 0) -- cgit v1.2.3 From 2b0cda8789654bfcebca397daebc37aff081bd75 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 26 Apr 2016 11:06:47 +0100 Subject: KVM: arm/arm64: vgic-new: Add CTLR, TYPER and IIDR handlers Those three registers are v2 emulation specific, so their implementation lives entirely in vgic-mmio-v2.c. Also they are handled in one function, as their implementation is pretty simple. When the guest enables the distributor, we kick all VCPUs to get potentially pending interrupts serviced. Signed-off-by: Marc Zyngier Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall --- include/linux/irqchip/arm-gic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index be0d26f940af..fd051855539b 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -33,6 +33,7 @@ #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 +#define GIC_DIST_IIDR 0x008 #define GIC_DIST_IGROUP 0x080 #define GIC_DIST_ENABLE_SET 0x100 #define GIC_DIST_ENABLE_CLEAR 0x180 -- cgit v1.2.3 From 7e13318daa4a67bff2f800923a993ef3818b3c53 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 18 May 2016 09:06:10 -0700 Subject: net: define gso types for IPx over IPv4 and IPv6 This patch defines two new GSO definitions SKB_GSO_IPXIP4 and SKB_GSO_IPXIP6 along with corresponding NETIF_F_GSO_IPXIP4 and NETIF_F_GSO_IPXIP6. These are used to described IP in IP tunnel and what the outer protocol is. The inner protocol can be deduced from other GSO types (e.g. SKB_GSO_TCPV4 and SKB_GSO_TCPV6). The GSO types of SKB_GSO_IPIP and SKB_GSO_SIT are removed (these are both instances of SKB_GSO_IPXIP4). SKB_GSO_IPXIP6 will be used when support for GSO with IP encapsulation over IPv6 is added. Signed-off-by: Tom Herbert Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 12 ++++++------ include/linux/netdevice.h | 4 ++-- include/linux/skbuff.h | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index bc8736266749..aa7b2400f98c 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -44,8 +44,8 @@ enum { NETIF_F_FSO_BIT, /* ... FCoE segmentation */ NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ NETIF_F_GSO_GRE_CSUM_BIT, /* ... GRE with csum with TSO */ - NETIF_F_GSO_IPIP_BIT, /* ... IPIP tunnel with TSO */ - NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ + NETIF_F_GSO_IPXIP4_BIT, /* ... IP4 or IP6 over IP4 with TSO */ + NETIF_F_GSO_IPXIP6_BIT, /* ... IP4 or IP6 over IP6 with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ NETIF_F_GSO_PARTIAL_BIT, /* ... Only segment inner-most L4 @@ -121,8 +121,8 @@ enum { #define NETIF_F_RXALL __NETIF_F(RXALL) #define NETIF_F_GSO_GRE __NETIF_F(GSO_GRE) #define NETIF_F_GSO_GRE_CSUM __NETIF_F(GSO_GRE_CSUM) -#define NETIF_F_GSO_IPIP __NETIF_F(GSO_IPIP) -#define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) +#define NETIF_F_GSO_IPXIP4 __NETIF_F(GSO_IPXIP4) +#define NETIF_F_GSO_IPXIP6 __NETIF_F(GSO_IPXIP6) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) @@ -200,8 +200,8 @@ enum { #define NETIF_F_GSO_ENCAP_ALL (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ - NETIF_F_GSO_IPIP | \ - NETIF_F_GSO_SIT | \ + NETIF_F_GSO_IPXIP4 | \ + NETIF_F_GSO_IPXIP6 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c148edfe4965..f45929ce8157 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4006,8 +4006,8 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE_CSUM != (NETIF_F_GSO_GRE_CSUM >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_IPIP != (NETIF_F_GSO_IPIP >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_IPXIP4 != (NETIF_F_GSO_IPXIP4 >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_IPXIP6 != (NETIF_F_GSO_IPXIP6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c413c588a24f..65968a97517f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -471,9 +471,9 @@ enum { SKB_GSO_GRE_CSUM = 1 << 8, - SKB_GSO_IPIP = 1 << 9, + SKB_GSO_IPXIP4 = 1 << 9, - SKB_GSO_SIT = 1 << 10, + SKB_GSO_IPXIP6 = 1 << 10, SKB_GSO_UDP_TUNNEL = 1 << 11, -- cgit v1.2.3 From 95829b3a9c0b1d88778b23bc2afdf5a83de066ff Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 19 May 2016 11:30:54 -0400 Subject: net: suppress warnings on dev_alloc_skb Noticed an allocation failure in a network driver the other day on a 32 bit system: DMA-API: debugging out of memory - disabling bnx2fc: adapter_lookup: hba NULL lldpad: page allocation failure. order:0, mode:0x4120 Pid: 4556, comm: lldpad Not tainted 2.6.32-639.el6.i686.debug #1 Call Trace: [] ? printk+0x19/0x23 [] ? __alloc_pages_nodemask+0x664/0x830 [] ? free_object+0x82/0xa0 [] ? ixgbe_alloc_rx_buffers+0x10b/0x1d0 [ixgbe] [] ? ixgbe_configure_rx_ring+0x29f/0x420 [ixgbe] [] ? ixgbe_configure_tx_ring+0x15c/0x220 [ixgbe] [] ? ixgbe_configure+0x589/0xc00 [ixgbe] [] ? ixgbe_open+0xa7/0x5c0 [ixgbe] [] ? ixgbe_init_interrupt_scheme+0x5b6/0x970 [ixgbe] [] ? ixgbe_setup_tc+0x1a4/0x260 [ixgbe] [] ? ixgbe_dcbnl_set_state+0x7f/0x90 [ixgbe] [] ? dcb_doit+0x10ed/0x16d0 ... Thought that perhaps the big splat in the logs wasn't really necessecary, as all call sites for dev_alloc_skb: a) check the return code for the function and b) either print their own error message or have a recovery path that makes the warning moot. Fix it by modifying dev_alloc_pages to pass __GFP_NOWARN as a gfp flag to suppress the warning applies to the net tree Signed-off-by: Neil Horman CC: "David S. Miller" CC: Eric Dumazet CC: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 65968a97517f..ee38a4127475 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2467,7 +2467,7 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, static inline struct page *dev_alloc_pages(unsigned int order) { - return __dev_alloc_pages(GFP_ATOMIC, order); + return __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, order); } /** @@ -2485,7 +2485,7 @@ static inline struct page *__dev_alloc_page(gfp_t gfp_mask) static inline struct page *dev_alloc_page(void) { - return __dev_alloc_page(GFP_ATOMIC); + return dev_alloc_pages(0); } /** -- cgit v1.2.3 From 59dc76b0d4dfdd7dc46a1010e4afb44f60f3e97f Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 20 May 2016 16:56:31 -0700 Subject: mm: vmscan: reduce size of inactive file list The inactive file list should still be large enough to contain readahead windows and freshly written file data, but it no longer is the only source for detecting multiple accesses to file pages. The workingset refault measurement code causes recently evicted file pages that get accessed again after a shorter interval to be promoted directly to the active list. With that mechanism in place, we can afford to (on a larger system) dedicate more memory to the active file list, so we can actually cache more of the frequently used file pages in memory, and not have them pushed out by streaming writes, once-used streaming file reads, etc. This can help things like database workloads, where only half the page cache can currently be used to cache the database working set. This patch automatically increases that fraction on larger systems, using the same ratio that has already been used for anonymous memory. [hannes@cmpxchg.org: cgroup-awareness] Signed-off-by: Rik van Riel Signed-off-by: Johannes Weiner Reported-by: Andres Freund Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 94da96738df3..a805474df4ab 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -415,25 +415,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) return mz->lru_size[lru]; } -static inline bool mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) -{ - unsigned long inactive_ratio; - unsigned long inactive; - unsigned long active; - unsigned long gb; - - inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON); - active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON); - - gb = (inactive + active) >> (30 - PAGE_SHIFT); - if (gb) - inactive_ratio = int_sqrt(10 * gb); - else - inactive_ratio = 1; - - return inactive * inactive_ratio < active; -} - void mem_cgroup_handle_over_high(void); void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, @@ -646,12 +627,6 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg) return true; } -static inline bool -mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) -{ - return true; -} - static inline unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { -- cgit v1.2.3 From ea7ab982b6bdb7ce218fd3a7850bb2e2b414fdd0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:38 -0700 Subject: mm, compaction: change COMPACT_ constants into enum Compaction code is doing weird dances between COMPACT_FOO -> int -> unsigned long But there doesn't seem to be any reason for that. All functions which return/use one of those constants are not expecting any other value so it really makes sense to define an enum for them and make it clear that no other values are expected. This is a pure cleanup and shouldn't introduce any functional changes. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Hillf Danton Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 242b660f64e6..706cbf00e919 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -2,21 +2,29 @@ #define _LINUX_COMPACTION_H /* Return values for compact_zone() and try_to_compact_pages() */ -/* compaction didn't start as it was deferred due to past failures */ -#define COMPACT_DEFERRED 0 -/* compaction didn't start as it was not possible or direct reclaim was more suitable */ -#define COMPACT_SKIPPED 1 -/* compaction should continue to another pageblock */ -#define COMPACT_CONTINUE 2 -/* direct compaction partially compacted a zone and there are suitable pages */ -#define COMPACT_PARTIAL 3 -/* The full zone was compacted */ -#define COMPACT_COMPLETE 4 -/* For more detailed tracepoint output */ -#define COMPACT_NO_SUITABLE_PAGE 5 -#define COMPACT_NOT_SUITABLE_ZONE 6 -#define COMPACT_CONTENDED 7 /* When adding new states, please adjust include/trace/events/compaction.h */ +enum compact_result { + /* compaction didn't start as it was deferred due to past failures */ + COMPACT_DEFERRED, + /* + * compaction didn't start as it was not possible or direct reclaim + * was more suitable + */ + COMPACT_SKIPPED, + /* compaction should continue to another pageblock */ + COMPACT_CONTINUE, + /* + * direct compaction partially compacted a zone and there are suitable + * pages + */ + COMPACT_PARTIAL, + /* The full zone was compacted */ + COMPACT_COMPLETE, + /* For more detailed tracepoint output */ + COMPACT_NO_SUITABLE_PAGE, + COMPACT_NOT_SUITABLE_ZONE, + COMPACT_CONTENDED, +}; /* Used to signal whether compaction detected need_sched() or lock contention */ /* No contention detected */ @@ -38,12 +46,13 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int sysctl_compact_unevictable_allowed; extern int fragmentation_index(struct zone *zone, unsigned int order); -extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, +extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, + unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum migrate_mode mode, int *contended); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); -extern unsigned long compaction_suitable(struct zone *zone, int order, +extern enum compact_result compaction_suitable(struct zone *zone, int order, unsigned int alloc_flags, int classzone_idx); extern void defer_compaction(struct zone *zone, int order); @@ -57,7 +66,7 @@ extern void kcompactd_stop(int nid); extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); #else -static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, +static inline enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, int alloc_flags, const struct alloc_context *ac, enum migrate_mode mode, int *contended) @@ -73,7 +82,7 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat) { } -static inline unsigned long compaction_suitable(struct zone *zone, int order, +static inline enum compact_result compaction_suitable(struct zone *zone, int order, int alloc_flags, int classzone_idx) { return COMPACT_SKIPPED; -- cgit v1.2.3 From 1d4746d395975e0ff5103e20ab169d1a95b4ef9e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:44 -0700 Subject: mm, compaction: distinguish COMPACT_DEFERRED from COMPACT_SKIPPED try_to_compact_pages() can currently return COMPACT_SKIPPED even when the compaction is defered for some zone just because zone DMA is skipped in 99% of cases due to watermark checks. This makes COMPACT_DEFERRED basically unusable for the page allocator as a feedback mechanism. Make sure we distinguish those two states properly and switch their ordering in the enum. This would mean that the COMPACT_SKIPPED will be returned only when all eligible zones are skipped. As a result COMPACT_DEFERRED handling for THP in __alloc_pages_slowpath will be more precise and we would bail out rather than reclaim. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Hillf Danton Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 706cbf00e919..11f228712ed5 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -4,13 +4,16 @@ /* Return values for compact_zone() and try_to_compact_pages() */ /* When adding new states, please adjust include/trace/events/compaction.h */ enum compact_result { - /* compaction didn't start as it was deferred due to past failures */ - COMPACT_DEFERRED, /* * compaction didn't start as it was not possible or direct reclaim * was more suitable */ COMPACT_SKIPPED, + /* compaction didn't start as it was deferred due to past failures */ + COMPACT_DEFERRED, + /* compaction not active last round */ + COMPACT_INACTIVE = COMPACT_DEFERRED, + /* compaction should continue to another pageblock */ COMPACT_CONTINUE, /* -- cgit v1.2.3 From c8f7de0bfae36e8532e5e25a39d15407f02aca78 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:47 -0700 Subject: mm, compaction: distinguish between full and partial COMPACT_COMPLETE COMPACT_COMPLETE now means that compaction and free scanner met. This is not very useful information if somebody just wants to use this feedback and make any decisions based on that. The current caller might be a poor guy who just happened to scan tiny portion of the zone and that could be the reason no suitable pages were compacted. Make sure we distinguish the full and partial zone walks. Consumers should treat COMPACT_PARTIAL_SKIPPED as a potential success and be optimistic in retrying. The existing users of COMPACT_COMPLETE are conservatively changed to use COMPACT_PARTIAL_SKIPPED as well but some of them should be probably reconsidered and only defer the compaction only for COMPACT_COMPLETE with the new semantic. This patch shouldn't introduce any functional changes. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Hillf Danton Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 11f228712ed5..9b37f9d3f7a8 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -21,7 +21,15 @@ enum compact_result { * pages */ COMPACT_PARTIAL, - /* The full zone was compacted */ + /* + * direct compaction has scanned part of the zone but wasn't successfull + * to compact suitable pages. + */ + COMPACT_PARTIAL_SKIPPED, + /* + * The full zone was compacted scanned but wasn't successfull to compact + * suitable pages. + */ COMPACT_COMPLETE, /* For more detailed tracepoint output */ COMPACT_NO_SUITABLE_PAGE, -- cgit v1.2.3 From 4f9a358c36fcdad3ea1db263ec4d484a70ad543e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:50 -0700 Subject: mm, compaction: update compaction_result ordering compaction_result will be used as the primary feedback channel for compaction users. At the same time try_to_compact_pages (and potentially others) assume a certain ordering where a more specific feedback takes precendence. This gets a bit awkward when we have conflicting feedback from different zones. E.g one returing COMPACT_COMPLETE meaning the full zone has been scanned without any outcome while other returns with COMPACT_PARTIAL aka made some progress. The caller should get COMPACT_PARTIAL because that means that the compaction still can make some progress. The same applies for COMPACT_PARTIAL vs COMPACT_PARTIAL_SKIPPED. Reorder PARTIAL to be the largest one so the larger the value is the more progress we have done. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Hillf Danton Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 9b37f9d3f7a8..ff39fa0a1ede 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -4,6 +4,8 @@ /* Return values for compact_zone() and try_to_compact_pages() */ /* When adding new states, please adjust include/trace/events/compaction.h */ enum compact_result { + /* For more detailed tracepoint output - internal to compaction */ + COMPACT_NOT_SUITABLE_ZONE, /* * compaction didn't start as it was not possible or direct reclaim * was more suitable @@ -11,30 +13,34 @@ enum compact_result { COMPACT_SKIPPED, /* compaction didn't start as it was deferred due to past failures */ COMPACT_DEFERRED, + /* compaction not active last round */ COMPACT_INACTIVE = COMPACT_DEFERRED, + /* For more detailed tracepoint output - internal to compaction */ + COMPACT_NO_SUITABLE_PAGE, /* compaction should continue to another pageblock */ COMPACT_CONTINUE, + /* - * direct compaction partially compacted a zone and there are suitable - * pages + * The full zone was compacted scanned but wasn't successfull to compact + * suitable pages. */ - COMPACT_PARTIAL, + COMPACT_COMPLETE, /* * direct compaction has scanned part of the zone but wasn't successfull * to compact suitable pages. */ COMPACT_PARTIAL_SKIPPED, + + /* compaction terminated prematurely due to lock contentions */ + COMPACT_CONTENDED, + /* - * The full zone was compacted scanned but wasn't successfull to compact - * suitable pages. + * direct compaction partially compacted a zone and there might be + * suitable pages */ - COMPACT_COMPLETE, - /* For more detailed tracepoint output */ - COMPACT_NO_SUITABLE_PAGE, - COMPACT_NOT_SUITABLE_ZONE, - COMPACT_CONTENDED, + COMPACT_PARTIAL, }; /* Used to signal whether compaction detected need_sched() or lock contention */ -- cgit v1.2.3 From cab1802b5f0dddea30547a7451fda8c7e4c593f0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:56:56 -0700 Subject: mm, compaction: abstract compaction feedback to helpers Compaction can provide a wild variation of feedback to the caller. Many of them are implementation specific and the caller of the compaction (especially the page allocator) shouldn't be bound to specifics of the current implementation. This patch abstracts the feedback into three basic types: - compaction_made_progress - compaction was active and made some progress. - compaction_failed - compaction failed and further attempts to invoke it would most probably fail and therefore it is not worth retrying - compaction_withdrawn - compaction wasn't invoked for an implementation specific reasons. In the current implementation it means that the compaction was deferred, contended or the page scanners met too early without any progress. Retrying is still worthwhile. [vbabka@suse.cz: do not change thp back off behavior] [akpm@linux-foundation.org: fix typo in comment, per Hillf] Signed-off-by: Michal Hocko Acked-by: Hillf Danton Acked-by: Vlastimil Babka Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index ff39fa0a1ede..8d8c916fe67a 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -78,6 +78,70 @@ extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); extern bool compaction_restarting(struct zone *zone, int order); +/* Compaction has made some progress and retrying makes sense */ +static inline bool compaction_made_progress(enum compact_result result) +{ + /* + * Even though this might sound confusing this in fact tells us + * that the compaction successfully isolated and migrated some + * pageblocks. + */ + if (result == COMPACT_PARTIAL) + return true; + + return false; +} + +/* Compaction has failed and it doesn't make much sense to keep retrying. */ +static inline bool compaction_failed(enum compact_result result) +{ + /* All zones were scanned completely and still not result. */ + if (result == COMPACT_COMPLETE) + return true; + + return false; +} + +/* + * Compaction has backed off for some reason. It might be throttling or + * lock contention. Retrying is still worthwhile. + */ +static inline bool compaction_withdrawn(enum compact_result result) +{ + /* + * Compaction backed off due to watermark checks for order-0 + * so the regular reclaim has to try harder and reclaim something. + */ + if (result == COMPACT_SKIPPED) + return true; + + /* + * If compaction is deferred for high-order allocations, it is + * because sync compaction recently failed. If this is the case + * and the caller requested a THP allocation, we do not want + * to heavily disrupt the system, so we fail the allocation + * instead of entering direct reclaim. + */ + if (result == COMPACT_DEFERRED) + return true; + + /* + * If compaction in async mode encounters contention or blocks higher + * priority task we back off early rather than cause stalls. + */ + if (result == COMPACT_CONTENDED) + return true; + + /* + * Page scanners have met but we haven't scanned full zones so this + * is a back off in fact. + */ + if (result == COMPACT_PARTIAL_SKIPPED) + return true; + + return false; +} + extern int kcompactd_run(int nid); extern void kcompactd_stop(int nid); extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); @@ -114,6 +178,21 @@ static inline bool compaction_deferred(struct zone *zone, int order) return true; } +static inline bool compaction_made_progress(enum compact_result result) +{ + return false; +} + +static inline bool compaction_failed(enum compact_result result) +{ + return false; +} + +static inline bool compaction_withdrawn(enum compact_result result) +{ + return true; +} + static inline int kcompactd_run(int nid) { return 0; -- cgit v1.2.3 From 0a0337e0d1d134465778a16f5cbea95086e8e9e0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:57:00 -0700 Subject: mm, oom: rework oom detection __alloc_pages_slowpath has traditionally relied on the direct reclaim and did_some_progress as an indicator that it makes sense to retry allocation rather than declaring OOM. shrink_zones had to rely on zone_reclaimable if shrink_zone didn't make any progress to prevent from a premature OOM killer invocation - the LRU might be full of dirty or writeback pages and direct reclaim cannot clean those up. zone_reclaimable allows to rescan the reclaimable lists several times and restart if a page is freed. This is really subtle behavior and it might lead to a livelock when a single freed page keeps allocator looping but the current task will not be able to allocate that single page. OOM killer would be more appropriate than looping without any progress for unbounded amount of time. This patch changes OOM detection logic and pulls it out from shrink_zone which is too low to be appropriate for any high level decisions such as OOM which is per zonelist property. It is __alloc_pages_slowpath which knows how many attempts have been done and what was the progress so far therefore it is more appropriate to implement this logic. The new heuristic is implemented in should_reclaim_retry helper called from __alloc_pages_slowpath. It tries to be more deterministic and easier to follow. It builds on an assumption that retrying makes sense only if the currently reclaimable memory + free pages would allow the current allocation request to succeed (as per __zone_watermark_ok) at least for one zone in the usable zonelist. This alone wouldn't be sufficient, though, because the writeback might get stuck and reclaimable pages might be pinned for a really long time or even depend on the current allocation context. Therefore there is a backoff mechanism implemented which reduces the reclaim target after each reclaim round without any progress. This means that we should eventually converge to only NR_FREE_PAGES as the target and fail on the wmark check and proceed to OOM. The backoff is simple and linear with 1/16 of the reclaimable pages for each round without any progress. We are optimistic and reset counter for successful reclaim rounds. Costly high order pages mostly preserve their semantic and those without __GFP_REPEAT fail right away while those which have the flag set will back off after the amount of reclaimable pages reaches equivalent of the requested order. The only difference is that if there was no progress during the reclaim we rely on zone watermark check. This is more logical thing to do than previous 1< Acked-by: Hillf Danton Cc: Vladimir Davydov Cc: Johannes Weiner Cc: David Rientjes Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index ad220359f1b0..0af2bb2028fd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -316,6 +316,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page, struct vm_area_struct *vma); /* linux/mm/vmscan.c */ +extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); -- cgit v1.2.3 From 86a294a81f93d6f36d00ec3ff779d36d218f852d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:57:12 -0700 Subject: mm, oom, compaction: prevent from should_compact_retry looping for ever for costly orders "mm: consider compaction feedback also for costly allocation" has removed the upper bound for the reclaim/compaction retries based on the number of reclaimed pages for costly orders. While this is desirable the patch did miss a mis interaction between reclaim, compaction and the retry logic. The direct reclaim tries to get zones over min watermark while compaction backs off and returns COMPACT_SKIPPED when all zones are below low watermark + 1< Acked-by: Hillf Danton Acked-by: Vlastimil Babka Cc: David Rientjes Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Tetsuo Handa Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 ++++ include/linux/mmzone.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 8d8c916fe67a..a58c852a268f 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -142,6 +142,10 @@ static inline bool compaction_withdrawn(enum compact_result result) return false; } + +bool compaction_zonelist_suitable(struct alloc_context *ac, int order, + int alloc_flags); + extern int kcompactd_run(int nid); extern void kcompactd_stop(int nid); extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c60db2096fd8..8dd0333b01dc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -739,6 +739,9 @@ static inline bool is_dev_zone(const struct zone *zone) extern struct mutex zonelists_mutex; void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); +bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, + int classzone_idx, unsigned int alloc_flags, + long free_pages); bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, unsigned int alloc_flags); -- cgit v1.2.3 From bb8a4b7fd1266ef888b3a80aa5f266062b224ef4 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:57:18 -0700 Subject: mm, oom_reaper: hide oom reaped tasks from OOM killer more carefully Commit 36324a990cf5 ("oom: clear TIF_MEMDIE after oom_reaper managed to unmap the address space") not only clears TIF_MEMDIE for oom reaped task but also set OOM_SCORE_ADJ_MIN for the target task to hide it from the oom killer. This works in simple cases but it is not sufficient for (unlikely) cases where the mm is shared between independent processes (as they do not share signal struct). If the mm had only small amount of memory which could be reaped then another task sharing the mm could be selected and that wouldn't help to move out from the oom situation. Introduce MMF_OOM_REAPED mm flag which is checked in oom_badness (same as OOM_SCORE_ADJ_MIN) and task is skipped if the flag is set. Set the flag after __oom_reap_task is done with a task. This will force the select_bad_process() to ignore all already oom reaped tasks as well as no such task is sacrificed for its parent. Signed-off-by: Michal Hocko Cc: Tetsuo Handa Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 31bd0d97d178..40eabf176ce2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -521,6 +521,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ +#define MMF_OOM_REAPED 21 /* mm has been already reaped */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) -- cgit v1.2.3 From ec8d7c14ea14922fe21945b458a75e39f11dd832 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:57:21 -0700 Subject: mm, oom_reaper: do not mmput synchronously from the oom reaper context Tetsuo has properly noted that mmput slow path might get blocked waiting for another party (e.g. exit_aio waits for an IO). If that happens the oom_reaper would be put out of the way and will not be able to process next oom victim. We should strive for making this context as reliable and independent on other subsystems as much as possible. Introduce mmput_async which will perform the slow path from an async (WQ) context. This will delay the operation but that shouldn't be a problem because the oom_reaper has reclaimed the victim's address space for most cases as much as possible and the remaining context shouldn't bind too much memory anymore. The only exception is when mmap_sem trylock has failed which shouldn't happen too often. The issue is only theoretical but not impossible. Signed-off-by: Michal Hocko Reported-by: Tetsuo Handa Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ include/linux/sched.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1fda9c99ef95..d553855503e6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -513,6 +514,7 @@ struct mm_struct { #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif + struct work_struct async_put_work; }; static inline void mm_init_cpumask(struct mm_struct *mm) diff --git a/include/linux/sched.h b/include/linux/sched.h index 40eabf176ce2..479e3cade7e9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2730,6 +2730,11 @@ static inline void mmdrop(struct mm_struct * mm) /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); +/* same as above but performs the slow path from the async kontext. Can + * be called from the atomic context as well + */ +extern void mmput_async(struct mm_struct *); + /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); /* -- cgit v1.2.3 From 98748bd722005be9de2662bd4f7e41ad8148bdbd Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 20 May 2016 16:57:24 -0700 Subject: oom: consider multi-threaded tasks in task_will_free_mem task_will_free_mem is a misnomer for a more complex PF_EXITING test for early break out from the oom killer because it is believed that such a task would release its memory shortly and so we do not have to select an oom victim and perform a disruptive action. Currently we make sure that the given task is not participating in the core dumping because it might get blocked for a long time - see commit d003f371b270 ("oom: don't assume that a coredumping thread will exit soon"). The check can still do better though. We shouldn't consider the task unless the whole thread group is going down. This is rather unlikely but not impossible. A single exiting thread would surely leave all the address space behind. If we are really unlucky it might get stuck on the exit path and keep its TIF_MEMDIE and so block the oom killer. Link: http://lkml.kernel.org/r/1460452756-15491-1-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Oleg Nesterov Cc: David Rientjes Cc: Tetsuo Handa Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 83b9c39bd8b7..d3f533f2f481 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -110,13 +110,24 @@ extern struct task_struct *find_lock_task_mm(struct task_struct *p); static inline bool task_will_free_mem(struct task_struct *task) { + struct signal_struct *sig = task->signal; + /* * A coredumping process may sleep for an extended period in exit_mm(), * so the oom killer cannot assume that the process will promptly exit * and release memory. */ - return (task->flags & PF_EXITING) && - !(task->signal->flags & SIGNAL_GROUP_COREDUMP); + if (sig->flags & SIGNAL_GROUP_COREDUMP) + return false; + + if (!(task->flags & PF_EXITING)) + return false; + + /* Make sure that the whole thread group is going down */ + if (!thread_group_empty(task) && !(sig->flags & SIGNAL_GROUP_EXIT)) + return false; + + return true; } /* sysctls */ -- cgit v1.2.3 From f44666b04605d1c7fd94ab90b7ccf633e7eff228 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 20 May 2016 16:57:27 -0700 Subject: mm,oom: speed up select_bad_process() loop Since commit 3a5dda7a17cf ("oom: prevent unnecessary oom kills or kernel panics"), select_bad_process() is using for_each_process_thread(). Since oom_unkillable_task() scans all threads in the caller's thread group and oom_task_origin() scans signal_struct of the caller's thread group, we don't need to call oom_unkillable_task() and oom_task_origin() on each thread. Also, since !mm test will be done later at oom_badness(), we don't need to do !mm test on each thread. Therefore, we only need to do TIF_MEMDIE test on each thread. Although the original code was correct it was quite inefficient because each thread group was scanned num_threads times which can be a lot especially with processes with many threads. Even though the OOM is extremely cold path it is always good to be as effective as possible when we are inside rcu_read_lock() - aka unpreemptible context. If we track number of TIF_MEMDIE threads inside signal_struct, we don't need to do TIF_MEMDIE test on each thread. This will allow select_bad_process() to use for_each_process(). This patch adds a counter to signal_struct for tracking how many TIF_MEMDIE threads are in a given thread group, and check it at oom_scan_process_thread() so that select_bad_process() can use for_each_process() rather than for_each_process_thread(). [mhocko@suse.com: do not blow the signal_struct size] Link: http://lkml.kernel.org/r/20160520075035.GF19172@dhcp22.suse.cz Link: http://lkml.kernel.org/r/201605182230.IDC73435.MVSOHLFOQFOJtF@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Cc: David Rientjes Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 479e3cade7e9..01fe1bb68754 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -669,6 +669,7 @@ struct signal_struct { atomic_t sigcnt; atomic_t live; int nr_threads; + atomic_t oom_victims; /* # of TIF_MEDIE threads in this thread group */ struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ -- cgit v1.2.3 From 80c4bd7a5e4368b680e0aeb57050a1b06eb573d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 May 2016 16:57:38 -0700 Subject: mm/vmalloc: keep a separate lazy-free list When mixing lots of vmallocs and set_memory_*() (which calls vm_unmap_aliases()) I encountered situations where the performance degraded severely due to the walking of the entire vmap_area list each invocation. One simple improvement is to add the lazily freed vmap_area to a separate lockless free list, such that we then avoid having to walk the full list on each purge. Signed-off-by: Chris Wilson Reviewed-by: Roman Pen Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: David Rientjes Cc: Joonsoo Kim Cc: Roman Pen Cc: Mel Gorman Cc: Toshi Kani Cc: Shawn Lin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index d1f1d338af20..957adb741b6f 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -4,6 +4,7 @@ #include #include #include +#include #include /* pgprot_t */ #include @@ -44,7 +45,7 @@ struct vmap_area { unsigned long flags; struct rb_node rb_node; /* address sorted rbtree */ struct list_head list; /* address sorted list */ - struct list_head purge_list; /* "lazy purge" list */ + struct llist_node purge_list; /* "lazy purge" list */ struct vm_struct *vm; struct rcu_head rcu_head; }; -- cgit v1.2.3 From b8ca9e3a612eaf3e54c6fa136c62246a1a9aece7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 May 2016 16:57:53 -0700 Subject: mm: tighten fault_in_pages_writeable() copy_page_to_iter_iovec() is currently the only user of fault_in_pages_writeable(), and it definitely can use fragments from high order pages. Make sure fault_in_pages_writeable() is only touching two adjacent pages at most, as claimed. Signed-off-by: Eric Dumazet Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index fe1513ffb7bf..97354102794d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -518,33 +518,27 @@ void page_endio(struct page *page, int rw, int err); extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter); /* - * Fault a userspace page into pagetables. Return non-zero on a fault. - * - * This assumes that two userspace pages are always sufficient. + * Fault one or two userspace pages into pagetables. + * Return -EINVAL if more than two pages would be needed. + * Return non-zero on a fault. */ static inline int fault_in_pages_writeable(char __user *uaddr, int size) { - int ret; + int span, ret; if (unlikely(size == 0)) return 0; + span = offset_in_page(uaddr) + size; + if (span > 2 * PAGE_SIZE) + return -EINVAL; /* * Writing zeroes into userspace here is OK, because we know that if * the zero gets there, we'll be overwriting it. */ ret = __put_user(0, uaddr); - if (ret == 0) { - char __user *end = uaddr + size - 1; - - /* - * If the page was already mapped, this will get a cache miss - * for sure, so try to avoid doing it. - */ - if (((unsigned long)uaddr & PAGE_MASK) != - ((unsigned long)end & PAGE_MASK)) - ret = __put_user(0, end); - } + if (ret == 0 && span > PAGE_SIZE) + ret = __put_user(0, uaddr + size - 1); return ret; } -- cgit v1.2.3 From 7fab358d90e6ba9d9cb702bee0c8a5f5c13bb6df Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Fri, 20 May 2016 16:57:59 -0700 Subject: include/linux/hugetlb*.h: clean up code Macro HUGETLBFS_SB is clear enough, so one statement is clearer than 3 lines statements. Remove redundant return statements for non-return functions, which can save lines, at least. Signed-off-by: Chen Gang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 +--- include/linux/hugetlb_cgroup.h | 4 ---- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e44c57876e89..7ef4b635015d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -353,9 +353,7 @@ extern unsigned int default_hstate_idx; static inline struct hstate *hstate_inode(struct inode *i) { - struct hugetlbfs_sb_info *hsb; - hsb = HUGETLBFS_SB(i->i_sb); - return hsb->hstate; + return HUGETLBFS_SB(i->i_sb)->hstate; } static inline struct hstate *hstate_file(struct file *f) diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 24154c26d469..063962f6dfc6 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -93,20 +93,17 @@ hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct page *page) { - return; } static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page) { - return; } static inline void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg) { - return; } static inline void hugetlb_cgroup_file_init(void) @@ -116,7 +113,6 @@ static inline void hugetlb_cgroup_file_init(void) static inline void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) { - return; } #endif /* CONFIG_MEM_RES_CTLR_HUGETLB */ -- cgit v1.2.3 From d70c17d436b3fb9dbdae8c93bf908a6110b0cb4f Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Fri, 20 May 2016 16:58:01 -0700 Subject: include/linux/hugetlb.h: use bool instead of int for hugepage_migration_supported() It is used as a pure bool function within kernel source wide. Signed-off-by: Chen Gang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7ef4b635015d..c26d4638f665 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -452,12 +452,12 @@ static inline pgoff_t basepage_index(struct page *page) extern void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); -static inline int hugepage_migration_supported(struct hstate *h) +static inline bool hugepage_migration_supported(struct hstate *h) { #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION return huge_page_shift(h) == PMD_SHIFT; #else - return 0; + return false; #endif } @@ -519,7 +519,7 @@ static inline pgoff_t basepage_index(struct page *page) return page->index; } #define dissolve_free_huge_pages(s, e) do {} while (0) -#define hugepage_migration_supported(h) 0 +#define hugepage_migration_supported(h) false static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) -- cgit v1.2.3 From 0c9ad804f178eae02a34045bb0916fa0e31623d5 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Fri, 20 May 2016 16:58:04 -0700 Subject: mm fix commmets: if SPARSEMEM, pgdata doesn't have page_ext If SPARSEMEM, use page_ext in mem_section if !SPARSEMEM, use page_ext in pgdata Signed-off-by: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8dd0333b01dc..02069c23486d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1063,7 +1063,7 @@ struct mem_section { unsigned long *pageblock_flags; #ifdef CONFIG_PAGE_EXTENSION /* - * If !SPARSEMEM, pgdat doesn't have page_ext pointer. We use + * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use * section. (see page_ext.h about this.) */ struct page_ext *page_ext; -- cgit v1.2.3 From d2a1a1f0a97a77d25cbada37161dc2ecdf01f93d Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 20 May 2016 16:58:16 -0700 Subject: mm: use unsigned long constant for page flags struct page->flags is unsigned long, so when shifting bits we should use UL suffix to match it. Found this problem after I added 64-bit CPU specific page flags and failed to compile the kernel: mm/page_alloc.c: In function '__free_one_page': mm/page_alloc.c:672:2: error: integer overflow in expression [-Werror=overflow] Link: http://lkml.kernel.org/r/1461971723-16187-1-git-send-email-yuzhao@google.com Signed-off-by: Yu Zhao Cc: "Kirill A . Shutemov" Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Jerome Marchand Cc: Denys Vlasenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a61e06e5fbce..e5a32445f930 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -479,7 +479,7 @@ static inline void ClearPageCompound(struct page *page) } #endif -#define PG_head_mask ((1L << PG_head)) +#define PG_head_mask ((1UL << PG_head)) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); @@ -670,7 +670,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) } #ifdef CONFIG_MMU -#define __PG_MLOCKED (1 << PG_mlocked) +#define __PG_MLOCKED (1UL << PG_mlocked) #else #define __PG_MLOCKED 0 #endif @@ -680,11 +680,11 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) * these flags set. It they are, there is a problem. */ #define PAGE_FLAGS_CHECK_AT_FREE \ - (1 << PG_lru | 1 << PG_locked | \ - 1 << PG_private | 1 << PG_private_2 | \ - 1 << PG_writeback | 1 << PG_reserved | \ - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED) + (1UL << PG_lru | 1UL << PG_locked | \ + 1UL << PG_private | 1UL << PG_private_2 | \ + 1UL << PG_writeback | 1UL << PG_reserved | \ + 1UL << PG_slab | 1UL << PG_swapcache | 1UL << PG_active | \ + 1UL << PG_unevictable | __PG_MLOCKED) /* * Flags checked when a page is prepped for return by the page allocator. @@ -695,10 +695,10 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) * alloc-free cycle to prevent from reusing the page. */ #define PAGE_FLAGS_CHECK_AT_PREP \ - (((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) + (((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) #define PAGE_FLAGS_PRIVATE \ - (1 << PG_private | 1 << PG_private_2) + (1UL << PG_private | 1UL << PG_private_2) /** * page_has_private - Determine if page has private stuff * @page: The page to be checked -- cgit v1.2.3 From 5f527c2b3ea261bfccb7d12f9feade924cc4987c Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 20 May 2016 16:58:24 -0700 Subject: mm: thp: microoptimize compound_mapcount() compound_mapcount() is only called after PageCompound() has already been checked by the caller, so there's no point to check it again. Gcc may optimize it away too because it's inline but this will remove the runtime check for sure and add it'll add an assert instead. Link: http://lkml.kernel.org/r/1462547040-1737-3-git-send-email-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Acked-by: Kirill A. Shutemov Cc: Alex Williamson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2b97be1147ec..65d18a45b8e8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -475,8 +475,7 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page) static inline int compound_mapcount(struct page *page) { - if (!PageCompound(page)) - return 0; + VM_BUG_ON_PAGE(!PageCompound(page), page); page = compound_head(page); return atomic_read(compound_mapcount_ptr(page)) + 1; } -- cgit v1.2.3 From d2005e3f41d4f9299e2df6a967c8beb5086967a9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 20 May 2016 16:58:36 -0700 Subject: userfaultfd: don't pin the user memory in userfaultfd_file_create() userfaultfd_file_create() increments mm->mm_users; this means that the memory won't be unmapped/freed if mm owner exits/execs, and UFFDIO_COPY after that can populate the orphaned mm more. Change userfaultfd_file_create() and userfaultfd_ctx_put() to use mm->mm_count to pin mm_struct. This means that atomic_inc_not_zero(mm->mm_users) is needed when we are going to actually play with this memory. Except handle_userfault() path doesn't need this, the caller must already have a reference. The patch adds the new trivial helper, mmget_not_zero(), it can have more users. Link: http://lkml.kernel.org/r/20160516172254.GA8595@redhat.com Signed-off-by: Oleg Nesterov Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 01fe1bb68754..6b3213d96da6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2723,12 +2723,17 @@ extern struct mm_struct * mm_alloc(void); /* mmdrop drops the mm and the page tables */ extern void __mmdrop(struct mm_struct *); -static inline void mmdrop(struct mm_struct * mm) +static inline void mmdrop(struct mm_struct *mm) { if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } +static inline bool mmget_not_zero(struct mm_struct *mm) +{ + return atomic_inc_not_zero(&mm->mm_users); +} + /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); /* same as above but performs the slow path from the async kontext. Can -- cgit v1.2.3 From 4b50bcc7eda4d3cc9e3f2a0aa60e590fedf728c5 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Fri, 20 May 2016 16:58:38 -0700 Subject: mm: use phys_addr_t for reserve_bootmem_region() arguments Since commit 92923ca3aace ("mm: meminit: only set page reserved in the memblock region") the reserved bit is set on reserved memblock regions. However start and end address are passed as unsigned long. This is only 32bit on i386, so it can end up marking the wrong pages reserved for ranges at 4GB and above. This was observed on a 32bit Xen dom0 which was booted with initial memory set to a value below 4G but allowing to balloon in memory (dom0_mem=1024M for example). This would define a reserved bootmem region for the additional memory (for example on a 8GB system there was a reverved region covering the 4GB-8GB range). But since the addresses were passed on as unsigned long, this was actually marking all pages from 0 to 4GB as reserved. Fixes: 92923ca3aacef63 ("mm: meminit: only set page reserved in the memblock region") Link: http://lkml.kernel.org/r/1463491221-10573-1-git-send-email-stefan.bader@canonical.com Signed-off-by: Stefan Bader Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 65d18a45b8e8..fbdb9d40847f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1763,7 +1763,7 @@ extern void free_highmem_page(struct page *page); extern void adjust_managed_page_count(struct page *page, long count); extern void mem_init_print_info(const char *str); -extern void reserve_bootmem_region(unsigned long start, unsigned long end); +extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end); /* Free the reserved page into the buddy system, so it gets managed. */ static inline void __free_reserved_page(struct page *page) -- cgit v1.2.3 From 5c0a85fad949212b3e059692deecdeed74ae7ec7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 20 May 2016 16:58:41 -0700 Subject: mm: make faultaround produce old ptes Currently, faultaround code produces young pte. This can screw up vmscan behaviour[1], as it makes vmscan think that these pages are hot and not push them out on first round. During sparse file access faultaround gets more pages mapped and all of them are young. Under memory pressure, this makes vmscan swap out anon pages instead, or to drop other page cache pages which otherwise stay resident. Modify faultaround to produce old ptes, so they can easily be reclaimed under memory pressure. This can to some extend defeat the purpose of faultaround on machines without hardware accessed bit as it will not help us with reducing the number of minor page faults. We may want to disable faultaround on such machines altogether, but that's subject for separate patchset. Minchan: "I tested 512M mmap sequential word read test on non-HW access bit system (i.e., ARM) and confirmed it doesn't increase minor fault any more. old: 4096 fault_around minor fault: 131291 elapsed time: 6747645 usec new: 65536 fault_around minor fault: 131291 elapsed time: 6709263 usec 0.56% benefit" [1] https://lkml.kernel.org/r/1460992636-711-1-git-send-email-vinmenon@codeaurora.org Link: http://lkml.kernel.org/r/1463488366-47723-1-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Acked-by: Minchan Kim Tested-by: Minchan Kim Acked-by: Rik van Riel Cc: Mel Gorman Cc: Michal Hocko Cc: Vinayak Menon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fbdb9d40847f..f223ac26b5d9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -596,7 +596,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) } void do_set_pte(struct vm_area_struct *vma, unsigned long address, - struct page *page, pte_t *pte, bool write, bool anon); + struct page *page, pte_t *pte, bool write, bool anon, bool old); #endif /* -- cgit v1.2.3 From 0bb2fd13b69abfd88880f356903b5c7ca36d5eea Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 20 May 2016 16:58:59 -0700 Subject: mm: page_is_guard(): return false when page_ext arrays are not allocated yet When enabling the below kernel configs: CONFIG_DEFERRED_STRUCT_PAGE_INIT CONFIG_DEBUG_PAGEALLOC CONFIG_PAGE_EXTENSION CONFIG_DEBUG_VM kernel bootup may fail due to the following oops: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] free_pcppages_bulk+0x2d2/0x8d0 PGD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 11 PID: 106 Comm: pgdatinit1 Not tainted 4.6.0-rc5-next-20160427 #26 Hardware name: Intel Corporation S5520HC/S5520HC, BIOS S5500.86B.01.10.0025.030220091519 03/02/2009 task: ffff88017c080040 ti: ffff88017c084000 task.ti: ffff88017c084000 RIP: 0010:[] [] free_pcppages_bulk+0x2d2/0x8d0 RSP: 0000:ffff88017c087c48 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001 RDX: 0000000000000980 RSI: 0000000000000080 RDI: 0000000000660401 RBP: ffff88017c087cd0 R08: 0000000000000401 R09: 0000000000000009 R10: ffff88017c080040 R11: 000000000000000a R12: 0000000000000400 R13: ffffea0019810000 R14: ffffea0019810040 R15: ffff88066cfe6080 FS: 0000000000000000(0000) GS:ffff88066cd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000002406000 CR4: 00000000000006e0 Call Trace: free_hot_cold_page+0x192/0x1d0 __free_pages+0x5c/0x90 __free_pages_boot_core+0x11a/0x14e deferred_free_range+0x50/0x62 deferred_init_memmap+0x220/0x3c3 kthread+0xf8/0x110 ret_from_fork+0x22/0x40 Code: 49 89 d4 48 c1 e0 06 49 01 c5 e9 de fe ff ff 4c 89 f7 44 89 4d b8 4c 89 45 c0 44 89 5d c8 48 89 4d d0 e8 62 c7 07 00 48 8b 4d d0 <48> 8b 00 44 8b 5d c8 4c 8b 45 c0 44 8b 4d b8 a8 02 0f 84 05 ff RIP [] free_pcppages_bulk+0x2d2/0x8d0 RSP CR2: 0000000000000000 The problem is lookup_page_ext() returns NULL then page_is_guard() tried to access it in page freeing. page_is_guard() depends on PAGE_EXT_DEBUG_GUARD bit of page extension flag, but freeing page might reach here before the page_ext arrays are allocated when feeding a range of pages to the allocator for the first time during bootup or memory hotplug. When it returns NULL, page_is_guard() should just return false instead of checking PAGE_EXT_DEBUG_GUARD unconditionally. Link: http://lkml.kernel.org/r/1463610225-29060-1-git-send-email-yang.shi@linaro.org Signed-off-by: Yang Shi Cc: Joonsoo Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f223ac26b5d9..b530c99e8e81 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2386,6 +2386,9 @@ static inline bool page_is_guard(struct page *page) return false; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return false; + return test_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); } #else -- cgit v1.2.3 From 55834c59098d0c5a97b0f3247e55832b67facdcf Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 20 May 2016 16:59:11 -0700 Subject: mm: kasan: initial memory quarantine implementation Quarantine isolates freed objects in a separate queue. The objects are returned to the allocator later, which helps to detect use-after-free errors. When the object is freed, its state changes from KASAN_STATE_ALLOC to KASAN_STATE_QUARANTINE. The object is poisoned and put into quarantine instead of being returned to the allocator, therefore every subsequent access to that object triggers a KASAN error, and the error handler is able to say where the object has been allocated and deallocated. When it's time for the object to leave quarantine, its state becomes KASAN_STATE_FREE and it's returned to the allocator. From now on the allocator may reuse it for another allocation. Before that happens, it's still possible to detect a use-after free on that object (it retains the allocation/deallocation stacks). When the allocator reuses this object, the shadow is unpoisoned and old allocation/deallocation stacks are wiped. Therefore a use of this object, even an incorrect one, won't trigger ASan warning. Without the quarantine, it's not guaranteed that the objects aren't reused immediately, that's why the probability of catching a use-after-free is lower than with quarantine in place. Quarantine isolates freed objects in a separate queue. The objects are returned to the allocator later, which helps to detect use-after-free errors. Freed objects are first added to per-cpu quarantine queues. When a cache is destroyed or memory shrinking is requested, the objects are moved into the global quarantine queue. Whenever a kmalloc call allows memory reclaiming, the oldest objects are popped out of the global queue until the total size of objects in quarantine is less than 3/4 of the maximum quarantine size (which is a fraction of installed physical memory). As long as an object remains in the quarantine, KASAN is able to report accesses to it, so the chance of reporting a use-after-free is increased. Once the object leaves quarantine, the allocator may reuse it, in which case the object is unpoisoned and KASAN can't detect incorrect accesses to it. Right now quarantine support is only enabled in SLAB allocator. Unification of KASAN features in SLAB and SLUB will be done later. This patch is based on the "mm: kasan: quarantine" patch originally prepared by Dmitry Chernenkov. A number of improvements have been suggested by Andrey Ryabinin. [glider@google.com: v9] Link: http://lkml.kernel.org/r/1462987130-144092-1-git-send-email-glider@google.com Signed-off-by: Alexander Potapenko Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Steven Rostedt Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 737371b56044..611927f5870d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -50,6 +50,8 @@ void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags); +void kasan_cache_shrink(struct kmem_cache *cache); +void kasan_cache_destroy(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); @@ -63,7 +65,8 @@ void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); -void kasan_slab_free(struct kmem_cache *s, void *object); +bool kasan_slab_free(struct kmem_cache *s, void *object); +void kasan_poison_slab_free(struct kmem_cache *s, void *object); struct kasan_cache { int alloc_meta_offset; @@ -88,6 +91,8 @@ static inline void kasan_free_pages(struct page *page, unsigned int order) {} static inline void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags) {} +static inline void kasan_cache_shrink(struct kmem_cache *cache) {} +static inline void kasan_cache_destroy(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, @@ -105,7 +110,11 @@ static inline void kasan_krealloc(const void *object, size_t new_size, static inline void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) {} -static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} +static inline bool kasan_slab_free(struct kmem_cache *s, void *object) +{ + return false; +} +static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {} static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} -- cgit v1.2.3 From 64f8ebaf115bcddc4aaa902f981c57ba6506bc42 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 May 2016 16:59:28 -0700 Subject: mm/kasan: add API to check memory regions Memory access coded in an assembly won't be seen by KASAN as a compiler can instrument only C code. Add kasan_check_[read,write]() API which is going to be used to check a certain memory range. Link: http://lkml.kernel.org/r/1462538722-1574-3-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Acked-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan-checks.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/kasan-checks.h (limited to 'include/linux') diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h new file mode 100644 index 000000000000..b7f8aced7870 --- /dev/null +++ b/include/linux/kasan-checks.h @@ -0,0 +1,12 @@ +#ifndef _LINUX_KASAN_CHECKS_H +#define _LINUX_KASAN_CHECKS_H + +#ifdef CONFIG_KASAN +void kasan_check_read(const void *p, unsigned int size); +void kasan_check_write(const void *p, unsigned int size); +#else +static inline void kasan_check_read(const void *p, unsigned int size) { } +static inline void kasan_check_write(const void *p, unsigned int size) { } +#endif + +#endif -- cgit v1.2.3 From d0d8da2dc49dfdfe1d788eaf4d55eb5d4964d926 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 20 May 2016 16:59:48 -0700 Subject: zsmalloc: require GFP in zs_malloc() Pass GFP flags to zs_malloc() instead of using a fixed mask supplied to zs_create_pool(), so we can be more flexible, but, more importantly, we need this to switch zram to per-cpu compression streams -- zram will try to allocate handle with preemption disabled in a fast path and switch to a slow path (using different gfp mask) if the fast one has failed. Apart from that, this also align zs_malloc() interface with zspool/zbud. [sergey.senozhatsky@gmail.com: pass GFP flags to zs_malloc() instead of using a fixed mask] Link: http://lkml.kernel.org/r/20160429150942.GA637@swordfish Link: http://lkml.kernel.org/r/20160429150942.GA637@swordfish Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zsmalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 34eb16098a33..57a8e98f2708 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -41,10 +41,10 @@ struct zs_pool_stats { struct zs_pool; -struct zs_pool *zs_create_pool(const char *name, gfp_t flags); +struct zs_pool *zs_create_pool(const char *name); void zs_destroy_pool(struct zs_pool *pool); -unsigned long zs_malloc(struct zs_pool *pool, size_t size); +unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags); void zs_free(struct zs_pool *pool, unsigned long obj); void *zs_map_object(struct zs_pool *pool, unsigned long handle, -- cgit v1.2.3 From 5f56a5dfdb9bcb3bca03df59980d4d2f012cbb53 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 20 May 2016 17:00:16 -0700 Subject: exit_thread: remove empty bodies Define HAVE_EXIT_THREAD for archs which want to do something in exit_thread. For others, let's define exit_thread as an empty inline. This is a cleanup before we change the prototype of exit_thread to accept a task parameter. [akpm@linux-foundation.org: fix mips] Signed-off-by: Jiri Slaby Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Aurelien Jacquiot Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf Cc: Chris Zankel Cc: David Howells Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Guan Xuetao Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jeff Dike Cc: Jesper Nilsson Cc: Jiri Slaby Cc: Jonas Bonn Cc: Koichi Yasutake Cc: Lennox Wu Cc: Ley Foon Tan Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Mikael Starvik Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Richard Henderson Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Steven Miao Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6b3213d96da6..167c0d4bf3fa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2769,7 +2769,14 @@ static inline int copy_thread_tls( } #endif extern void flush_thread(void); + +#ifdef CONFIG_HAVE_EXIT_THREAD extern void exit_thread(void); +#else +static inline void exit_thread(void) +{ +} +#endif extern void exit_files(struct task_struct *); extern void __cleanup_sighand(struct sighand_struct *); -- cgit v1.2.3 From e64646946ed32902fd597fa6e514b1da84642de3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 20 May 2016 17:00:20 -0700 Subject: exit_thread: accept a task parameter to be exited We need to call exit_thread from copy_process in a fail path. So make it accept task_struct as a parameter. [v2] * s390: exit_thread_runtime_instr doesn't make sense to be called for non-current tasks. * arm: fix the comment in vfp_thread_copy * change 'me' to 'tsk' for task_struct * now we can change only archs that actually have exit_thread [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jiri Slaby Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Aurelien Jacquiot Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf Cc: Chris Zankel Cc: David Howells Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Guan Xuetao Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jeff Dike Cc: Jesper Nilsson Cc: Jiri Slaby Cc: Jonas Bonn Cc: Koichi Yasutake Cc: Lennox Wu Cc: Ley Foon Tan Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Mikael Starvik Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Richard Henderson Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Steven Miao Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 167c0d4bf3fa..02bdab4d6db7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2771,9 +2771,9 @@ static inline int copy_thread_tls( extern void flush_thread(void); #ifdef CONFIG_HAVE_EXIT_THREAD -extern void exit_thread(void); +extern void exit_thread(struct task_struct *tsk); #else -static inline void exit_thread(void) +static inline void exit_thread(struct task_struct *tsk) { } #endif -- cgit v1.2.3 From 2eeed7e98d6a1341b1574893a95ce5b8379140f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Nyffenegger?= Date: Fri, 20 May 2016 17:00:30 -0700 Subject: include/linux/syscalls.h: use pid_t instead of int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In include/linux/syscalls.h, the four functions sys_kill, sys_tgkill, sys_tkill and sys_rt_sigqueueinfo are declared with "int pid" and "int tgid". However, in kernel/signal.c, the corresponding definitions use the more appropriate "pid_t" (which is a typedef'd int). This patch changes "int" to "pid_t" in the declarations of sys_kill, sys_tgkill, sys_tkill and sys_rt_sigqueueinfo in in order to harmonize the function declarations with their respective definitions. Link: http://lkml.kernel.org/r/57302FDA.7020205@renenyffenegger.ch Signed-off-by: René Nyffenegger Cc: Andy Lutomirski Cc: Josh Triplett Cc: Al Viro Cc: "Steven Rostedt (Red Hat)" Cc: Zach Brown Cc: Milosz Tanski Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d795472c54d8..d02239022bd0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -371,10 +371,10 @@ asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese, size_t sigsetsize); asmlinkage long sys_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t __user *uinfo); -asmlinkage long sys_kill(int pid, int sig); -asmlinkage long sys_tgkill(int tgid, int pid, int sig); -asmlinkage long sys_tkill(int pid, int sig); -asmlinkage long sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo); +asmlinkage long sys_kill(pid_t pid, int sig); +asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig); +asmlinkage long sys_tkill(pid_t pid, int sig); +asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo); asmlinkage long sys_sgetmask(void); asmlinkage long sys_ssetmask(int newmask); asmlinkage long sys_signal(int sig, __sighandler_t handler); -- cgit v1.2.3 From 42a0bb3f71383b457a7db362f1c69e7afb96732b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 20 May 2016 17:00:33 -0700 Subject: printk/nmi: generic solution for safe printk in NMI printk() takes some locks and could not be used a safe way in NMI context. The chance of a deadlock is real especially when printing stacks from all CPUs. This particular problem has been addressed on x86 by the commit a9edc8809328 ("x86/nmi: Perform a safe NMI stack trace on all CPUs"). The patchset brings two big advantages. First, it makes the NMI backtraces safe on all architectures for free. Second, it makes all NMI messages almost safe on all architectures (the temporary buffer is limited. We still should keep the number of messages in NMI context at minimum). Note that there already are several messages printed in NMI context: WARN_ON(in_nmi()), BUG_ON(in_nmi()), anything being printed out from MCE handlers. These are not easy to avoid. This patch reuses most of the code and makes it generic. It is useful for all messages and architectures that support NMI. The alternative printk_func is set when entering and is reseted when leaving NMI context. It queues IRQ work to copy the messages into the main ring buffer in a safe context. __printk_nmi_flush() copies all available messages and reset the buffer. Then we could use a simple cmpxchg operations to get synchronized with writers. There is also used a spinlock to get synchronized with other flushers. We do not longer use seq_buf because it depends on external lock. It would be hard to make all supported operations safe for a lockless use. It would be confusing and error prone to make only some operations safe. The code is put into separate printk/nmi.c as suggested by Steven Rostedt. It needs a per-CPU buffer and is compiled only on architectures that call nmi_enter(). This is achieved by the new HAVE_NMI Kconfig flag. The are MN10300 and Xtensa architectures. We need to clean up NMI handling there first. Let's do it separately. The patch is heavily based on the draft from Peter Zijlstra, see https://lkml.org/lkml/2015/6/10/327 [arnd@arndb.de: printk-nmi: use %zu format string for size_t] [akpm@linux-foundation.org: min_t->min - all types are size_t here] Signed-off-by: Petr Mladek Suggested-by: Peter Zijlstra Suggested-by: Steven Rostedt Cc: Jan Kara Acked-by: Russell King [arm part] Cc: Daniel Thompson Cc: Jiri Kosina Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: David Miller Cc: Daniel Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 2 ++ include/linux/percpu.h | 3 --- include/linux/printk.h | 12 +++++++++++- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index dfd59d6bc6f0..c683996110b1 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -61,6 +61,7 @@ extern void irq_exit(void); #define nmi_enter() \ do { \ + printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ @@ -77,6 +78,7 @@ extern void irq_exit(void); preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ lockdep_on(); \ + printk_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4bc6dafb703e..56939d3f6e53 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -129,7 +129,4 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ __alignof__(type)) -/* To avoid include hell, as printk can not declare this, we declare it here */ -DECLARE_PER_CPU(printk_func_t, printk_func); - #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 9ccbdf2c1453..51dd6b824fe2 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -122,7 +122,17 @@ static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); +#ifdef CONFIG_PRINTK_NMI +extern void printk_nmi_init(void); +extern void printk_nmi_enter(void); +extern void printk_nmi_exit(void); +extern void printk_nmi_flush(void); +#else +static inline void printk_nmi_init(void) { } +static inline void printk_nmi_enter(void) { } +static inline void printk_nmi_exit(void) { } +static inline void printk_nmi_flush(void) { } +#endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) -- cgit v1.2.3 From cf9b1106c81c45cde02208fca49d3f3e4ab6ee74 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 20 May 2016 17:00:42 -0700 Subject: printk/nmi: flush NMI messages on the system panic In NMI context, printk() messages are stored into per-CPU buffers to avoid a possible deadlock. They are normally flushed to the main ring buffer via an IRQ work. But the work is never called when the system calls panic() in the very same NMI handler. This patch tries to flush NMI buffers before the crash dump is generated. In this case it does not risk a double release and bails out when the logbuf_lock is already taken. The aim is to get the messages into the main ring buffer when possible. It makes them better accessible in the vmcore. Then the patch tries to flush the buffers second time when other CPUs are down. It might be more aggressive and reset logbuf_lock. The aim is to get the messages available for the consequent kmsg_dump() and console_flush_on_panic() calls. The patch causes vprintk_emit() to be called even in NMI context again. But it is done via printk_deferred() so that the console handling is skipped. Consoles use internal locks and we could not prevent a deadlock easily. They are explicitly called later when the crash dump is not generated, see console_flush_on_panic(). Signed-off-by: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Daniel Thompson Cc: David Miller Cc: Ingo Molnar Cc: Jan Kara Cc: Jiri Kosina Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Russell King Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 51dd6b824fe2..f4da695fd615 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -127,11 +127,13 @@ extern void printk_nmi_init(void); extern void printk_nmi_enter(void); extern void printk_nmi_exit(void); extern void printk_nmi_flush(void); +extern void printk_nmi_flush_on_panic(void); #else static inline void printk_nmi_init(void) { } static inline void printk_nmi_enter(void) { } static inline void printk_nmi_exit(void) { } static inline void printk_nmi_flush(void) { } +static inline void printk_nmi_flush_on_panic(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK -- cgit v1.2.3 From 8da4b8c48e7b43cb16d05e1dbb34ad9f73ab7efd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:00 -0700 Subject: lib/uuid.c: move generate_random_uuid() to uuid.c Let's gather the UUID related functions under one hood. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/random.h | 1 - include/linux/uuid.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 9c29122037f9..e47e533742b5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -26,7 +26,6 @@ extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -void generate_random_uuid(unsigned char uuid_out[16]); extern int random_int_secret_init(void); #ifndef MODULE diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 6df2509033d7..91c2b6d9cbb7 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -33,6 +33,8 @@ static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2) return memcmp(&u1, &u2, sizeof(uuid_be)); } +void generate_random_uuid(unsigned char uuid[16]); + extern void uuid_le_gen(uuid_le *u); extern void uuid_be_gen(uuid_be *u); -- cgit v1.2.3 From 2b1b0d66704a8cafe83be7114ec4c15ab3a314ad Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:04 -0700 Subject: lib/uuid.c: introduce a few more generic helpers There are new helpers in this patch: uuid_is_valid checks if a UUID is valid uuid_be_to_bin converts from string to binary (big endian) uuid_le_to_bin converts from string to binary (little endian) They will be used in future, i.e. in the following patches in the series. This also moves the indices arrays to lib/uuid.c to be shared accross modules. [andriy.shevchenko@linux.intel.com: fix typo] Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uuid.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 91c2b6d9cbb7..e0b95e728a77 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -22,6 +22,11 @@ #include +/* + * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") + * not including trailing NUL. + */ +#define UUID_STRING_LEN 36 static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) { @@ -38,4 +43,12 @@ void generate_random_uuid(unsigned char uuid[16]); extern void uuid_le_gen(uuid_le *u); extern void uuid_be_gen(uuid_be *u); +bool __must_check uuid_is_valid(const char *uuid); + +extern const u8 uuid_le_index[16]; +extern const u8 uuid_be_index[16]; + +int uuid_le_to_bin(const char *uuid, uuid_le *u); +int uuid_be_to_bin(const char *uuid, uuid_be *u); + #endif -- cgit v1.2.3 From e3a93bce69ad3e2c38927abe311b8cb4f17abbaf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:07 -0700 Subject: lib/uuid.c: remove FSF address There is no point in keeping an address in the file since it's subject to change. While here, update Intel Copyright years. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uuid.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index e0b95e728a77..2d095fc60204 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -1,7 +1,7 @@ /* * UUID/GUID definition * - * Copyright (C) 2010, Intel Corp. + * Copyright (C) 2010, 2016 Intel Corp. * Huang Ying * * This program is free software; you can redistribute it and/or @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_UUID_H_ #define _LINUX_UUID_H_ -- cgit v1.2.3 From ba7e34b1bbd2722685bbc75d168672d5154d8614 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:18 -0700 Subject: include/linux/efi.h: redefine type, constant, macro from generic code Generic UUID library defines structure type, macro to define UUID, and the length of the UUID string. This patch removes duplicate data structure definition, UUID string length constant as well as macro for UUID handling. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/efi.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index df7acb51f3cc..c2db3ca22217 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -44,17 +45,10 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; - -typedef struct { - u8 b[16]; -} efi_guid_t; +typedef uuid_le efi_guid_t; #define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \ -((efi_guid_t) \ -{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ - (b) & 0xff, ((b) >> 8) & 0xff, \ - (c) & 0xff, ((c) >> 8) & 0xff, \ - (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) /* * Generic EFI table header @@ -1117,7 +1111,7 @@ extern int efi_status_to_err(efi_status_t status); * Length of a GUID string (strlen("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")) * not including trailing NUL */ -#define EFI_VARIABLE_GUID_LEN 36 +#define EFI_VARIABLE_GUID_LEN UUID_STRING_LEN /* * The type of search to perform when calling boottime->locate_handle -- cgit v1.2.3 From 63579785752ba7d0e842078ec6b2875367046f06 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:24 -0700 Subject: include/linux/genhd.h: move to use generic UUID library UUID library provides uuid_be type and uuid_be_to_bin() function. This substitutes open coded variant by generic library calls. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genhd.h | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5c706765404a..359a8e4bd44d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_BLOCK @@ -93,7 +94,7 @@ struct disk_stats { * Enough for the string representation of any kind of UUID plus NULL. * EFI UUID is 36 characters. MSDOS UUID is 11 characters. */ -#define PARTITION_META_INFO_UUIDLTH 37 +#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) struct partition_meta_info { char uuid[PARTITION_META_INFO_UUIDLTH]; @@ -228,27 +229,9 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } -static inline void part_pack_uuid(const u8 *uuid_str, u8 *to) -{ - int i; - for (i = 0; i < 16; ++i) { - *to++ = (hex_to_bin(*uuid_str) << 4) | - (hex_to_bin(*(uuid_str + 1))); - uuid_str += 2; - switch (i) { - case 3: - case 5: - case 7: - case 9: - uuid_str++; - continue; - } - } -} - static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to) { - part_pack_uuid(uuid_str, to); + uuid_be_to_bin(uuid_str, (uuid_be *)to); return 0; } -- cgit v1.2.3 From e9256efcc8e390fa4fcf796a0c0b47d642d77d32 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:33 -0700 Subject: radix-tree: introduce radix_tree_empty Commit e61452365372 ("radix_tree: add support for multi-order entries") left the impression that the support for multiorder radix tree entries was functional. As soon as Ross tried to use it, it became apparent that my testing was completely inadequate, and it didn't even work a little bit for orders that were not a multiple of shift. This series of patches is the result of about 6 weeks of redesign, reimplementation, testing, arguing and hair-pulling. The great news is that the test-suite is now far better than it was. That's reflected in the diffstat for the test-suite alone: 12 files changed, 436 insertions(+), 28 deletions(-) The highlight for users of the tree is that the restriction on the order of inserted entries being >= RADIX_TREE_MAP_SHIFT is now gone; the radix tree now supports any order between 0 and 64. For those who are interested in how the tree works, patch 9 is probably the most interesting one as it introduces the new machinery for handling sibling entries. I've tried to be fair in attributing authorship to the person who contributed the majority of the code in each patch; Ross has been an invaluable partner in the development of this support and it's fair to say that each of us has code in every commit. I should also express my appreciation of the 0day testing. It prompted me that I was bloating the tinyconfig in an unacceptable way, and it bisected to a commit which contained a rather nasty memory-corruption bug. This patch (of 29): The irqdomain code was checking for 0 or 1 entries, not 0 entries like the comment said they were. Introduce a new helper that will actually check for an empty tree. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 51a97ac8bfbf..83f708e5db59 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -136,6 +136,11 @@ do { \ (root)->rnode = NULL; \ } while (0) +static inline bool radix_tree_empty(struct radix_tree_root *root) +{ + return root->rnode == NULL; +} + /** * Radix-tree synchronization * -- cgit v1.2.3 From 97d778b2de9213c7a7483dad0f533c1af9f0810f Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:01:42 -0700 Subject: radix tree test suite: allow testing other fan-out values The defines in regression2.c are already in radix-tree.h and duplicating them in the test case makes experimenting with other values for the fan-out harder than necessary. Allow the user of the radix tree to decide what the fan-out should be rather than fixing it to 8 for non-kernel uses. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 83f708e5db59..5ce5a1e0ecc5 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -70,10 +70,8 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) #define RADIX_TREE_MAX_TAGS 3 -#ifdef __KERNEL__ +#ifndef RADIX_TREE_MAP_SHIFT #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ #endif #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) -- cgit v1.2.3 From 6c4bd68a2962c03423a226d949caf64216d013cc Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:01:51 -0700 Subject: radix-tree: remove unused looping macros radix_tree_for_each_chunk() and radix_tree_for_each_chunk_slot() have never been used in the kernel since their introduction in 2012, so remove them. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 5ce5a1e0ecc5..e1512a607709 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -478,34 +478,6 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return NULL; } -/** - * radix_tree_for_each_chunk - iterate over chunks - * - * @slot: the void** variable for pointer to chunk first slot - * @root: the struct radix_tree_root pointer - * @iter: the struct radix_tree_iter pointer - * @start: iteration starting index - * @flags: RADIX_TREE_ITER_* and tag index - * - * Locks can be released and reacquired between iterations. - */ -#define radix_tree_for_each_chunk(slot, root, iter, start, flags) \ - for (slot = radix_tree_iter_init(iter, start) ; \ - (slot = radix_tree_next_chunk(root, iter, flags)) ;) - -/** - * radix_tree_for_each_chunk_slot - iterate over slots in one chunk - * - * @slot: the void** variable, at the beginning points to chunk first slot - * @iter: the struct radix_tree_iter pointer - * @flags: RADIX_TREE_ITER_*, should be constant - * - * This macro is designed to be nested inside radix_tree_for_each_chunk(). - * @slot points to the radix tree slot, @iter->index contains its index. - */ -#define radix_tree_for_each_chunk_slot(slot, iter, flags) \ - for (; slot ; slot = radix_tree_next_slot(slot, iter, flags)) - /** * radix_tree_for_each_slot - iterate over non-empty slots * -- cgit v1.2.3 From 21ef533931f73a8e963a6107aa5ec51b192f28be Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:26 -0700 Subject: radix-tree: add support for multi-order iterating This enables the macros radix_tree_for_each_slot() and friends to be used with multi-order entries. The way that this works is that we treat all entries in a given slots[] array as a single chunk. If the index given to radix_tree_next_chunk() happens to point us to a sibling entry, we will back up iter->index so that it points to the canonical entry, and that will be the place where we start our iteration. As we're processing a chunk in radix_tree_next_slot(), we process canonical entries, skip over sibling entries, and restart the chunk lookup if we find a non-sibling indirect pointer. This drops back to the radix_tree_next_chunk() code, which will re-walk the tree and look for another chunk. This allows us to properly handle multi-order entries mixed with other entries that are at various heights in the radix tree. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 69 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index e1512a607709..8558d52e1c7b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -330,8 +330,9 @@ static inline void radix_tree_preload_end(void) * struct radix_tree_iter - radix tree iterator state * * @index: index of current slot - * @next_index: next-to-last index for this chunk + * @next_index: one beyond the last index for this chunk * @tags: bit-mask for tag-iterating + * @shift: shift for the node that holds our slots * * This radix tree iterator works in terms of "chunks" of slots. A chunk is a * subinterval of slots contained within one radix tree leaf node. It is @@ -344,8 +345,20 @@ struct radix_tree_iter { unsigned long index; unsigned long next_index; unsigned long tags; +#ifdef CONFIG_RADIX_TREE_MULTIORDER + unsigned int shift; +#endif }; +static inline unsigned int iter_shift(struct radix_tree_iter *iter) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + return iter->shift; +#else + return 0; +#endif +} + #define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */ #define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */ #define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */ @@ -405,6 +418,12 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) return NULL; } +static inline unsigned long +__radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots) +{ + return iter->index + (slots << iter_shift(iter)); +} + /** * radix_tree_iter_next - resume iterating when the chunk may be invalid * @iter: iterator state @@ -416,7 +435,7 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) static inline __must_check void **radix_tree_iter_next(struct radix_tree_iter *iter) { - iter->next_index = iter->index + 1; + iter->next_index = __radix_tree_iter_add(iter, 1); iter->tags = 0; return NULL; } @@ -430,7 +449,12 @@ void **radix_tree_iter_next(struct radix_tree_iter *iter) static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { - return iter->next_index - iter->index; + return (iter->next_index - iter->index) >> iter_shift(iter); +} + +static inline void *indirect_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } /** @@ -448,24 +472,51 @@ static __always_inline void ** radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) { if (flags & RADIX_TREE_ITER_TAGGED) { + void *canon = slot; + iter->tags >>= 1; + if (unlikely(!iter->tags)) + return NULL; + while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && + radix_tree_is_indirect_ptr(slot[1])) { + if (indirect_to_ptr(slot[1]) == canon) { + iter->tags >>= 1; + iter->index = __radix_tree_iter_add(iter, 1); + slot++; + continue; + } + iter->next_index = __radix_tree_iter_add(iter, 1); + return NULL; + } if (likely(iter->tags & 1ul)) { - iter->index++; + iter->index = __radix_tree_iter_add(iter, 1); return slot + 1; } - if (!(flags & RADIX_TREE_ITER_CONTIG) && likely(iter->tags)) { + if (!(flags & RADIX_TREE_ITER_CONTIG)) { unsigned offset = __ffs(iter->tags); iter->tags >>= offset; - iter->index += offset + 1; + iter->index = __radix_tree_iter_add(iter, offset + 1); return slot + offset + 1; } } else { - long size = radix_tree_chunk_size(iter); + long count = radix_tree_chunk_size(iter); + void *canon = slot; - while (--size > 0) { + while (--count > 0) { slot++; - iter->index++; + iter->index = __radix_tree_iter_add(iter, 1); + + if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && + radix_tree_is_indirect_ptr(*slot)) { + if (indirect_to_ptr(*slot) == canon) + continue; + else { + iter->next_index = iter->index; + break; + } + } + if (likely(*slot)) return slot; if (flags & RADIX_TREE_ITER_CONTIG) { -- cgit v1.2.3 From 0c7fa0a8418cbe0e8963fe36db9575d03b8589f7 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:07 -0700 Subject: radix-tree: split node->path into offset and height Neither piece of information we're storing in node->path can be larger than 64, so store each in its own unsigned char instead of shifting and masking to store them both in an unsigned int. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 8558d52e1c7b..2d2ad9d685a3 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -84,16 +84,13 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) -/* Height component in node->path */ -#define RADIX_TREE_HEIGHT_SHIFT (RADIX_TREE_MAX_PATH + 1) -#define RADIX_TREE_HEIGHT_MASK ((1UL << RADIX_TREE_HEIGHT_SHIFT) - 1) - /* Internally used bits of node->count */ #define RADIX_TREE_COUNT_SHIFT (RADIX_TREE_MAP_SHIFT + 1) #define RADIX_TREE_COUNT_MASK ((1UL << RADIX_TREE_COUNT_SHIFT) - 1) struct radix_tree_node { - unsigned int path; /* Offset in parent & height from the bottom */ + unsigned char height; /* From the bottom */ + unsigned char offset; /* Slot offset in parent */ unsigned int count; union { struct { -- cgit v1.2.3 From c12e51b07b3ac4c188fd91a82f96840fdb9cca6f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:10 -0700 Subject: radix-tree: replace node->height with node->shift node->shift represents the shift necessary for looking in the slots array at this level. It is equal to the old (node->height - 1) * RADIX_TREE_MAP_SHIFT. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 2d2ad9d685a3..037458257e12 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -89,7 +89,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) #define RADIX_TREE_COUNT_MASK ((1UL << RADIX_TREE_COUNT_SHIFT) - 1) struct radix_tree_node { - unsigned char height; /* From the bottom */ + unsigned char shift; /* Bits remaining in each slot */ unsigned char offset; /* Slot offset in parent */ unsigned int count; union { -- cgit v1.2.3 From d0891265bbc988dc91ed8580b38eb3dac128581b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:19 -0700 Subject: radix-tree: remove root->height The only remaining references to root->height were in extend and shrink, where it was updated. Now we can remove it entirely. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 037458257e12..c0d223cfac00 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -110,13 +110,11 @@ struct radix_tree_node { /* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ struct radix_tree_root { - unsigned int height; gfp_t gfp_mask; struct radix_tree_node __rcu *rnode; }; #define RADIX_TREE_INIT(mask) { \ - .height = 0, \ .gfp_mask = (mask), \ .rnode = NULL, \ } @@ -126,7 +124,6 @@ struct radix_tree_root { #define INIT_RADIX_TREE(root, mask) \ do { \ - (root)->height = 0; \ (root)->gfp_mask = (mask); \ (root)->rnode = NULL; \ } while (0) -- cgit v1.2.3 From 30ff46ccb303fb6f6c28b9aa9f2cdc4ba900ed3f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:22 -0700 Subject: radix-tree: rename INDIRECT_PTR to INTERNAL_NODE The name RADIX_TREE_INDIRECT_PTR doesn't really match the meaning. RADIX_TREE_INTERNAL_NODE is a better name. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c0d223cfac00..c8cc879046c7 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -29,20 +29,16 @@ #include /* - * An indirect pointer (root->rnode pointing to a radix_tree_node, rather - * than a data item) is signalled by the low bit set in the root->rnode - * pointer. - * - * In this case root->height is > 0, but the indirect pointer tests are - * needed for RCU lookups (because root->height is unreliable). The only - * time callers need worry about this is when doing a lookup_slot under - * RCU. - * - * Indirect pointer in fact is also used to tag the last pointer of a node - * when it is shrunk, before we rcu free the node. See shrink code for - * details. + * Entries in the radix tree have the low bit set if they refer to a + * radix_tree_node. If the low bit is clear then the entry is user data. + * + * We also use the low bit to indicate that the slot will be freed in the + * next RCU idle period, and users need to re-walk the tree to find the + * new slot for the index that they were looking for. See the comment in + * radix_tree_shrink() for details. */ -#define RADIX_TREE_INDIRECT_PTR 1 +#define RADIX_TREE_INTERNAL_NODE 1 + /* * A common use of the radix tree is to store pointers to struct pages; * but shmem/tmpfs needs also to store swap entries in the same tree: @@ -63,7 +59,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) { - return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR); + return (int)((unsigned long)ptr & RADIX_TREE_INTERNAL_NODE); } /*** radix-tree API starts here ***/ @@ -228,7 +224,7 @@ static inline void *radix_tree_deref_slot_protected(void **pslot, */ static inline int radix_tree_deref_retry(void *arg) { - return unlikely((unsigned long)arg & RADIX_TREE_INDIRECT_PTR); + return unlikely(radix_tree_is_indirect_ptr(arg)); } /** @@ -250,7 +246,7 @@ static inline int radix_tree_exceptional_entry(void *arg) static inline int radix_tree_exception(void *arg) { return unlikely((unsigned long)arg & - (RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY)); + (RADIX_TREE_INTERNAL_NODE | RADIX_TREE_EXCEPTIONAL_ENTRY)); } /** @@ -448,7 +444,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) static inline void *indirect_to_ptr(void *ptr) { - return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); + return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); } /** -- cgit v1.2.3 From 4dd6c0987ca43d6544f4f0a3f86f6ea3bfc60fc1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:27 -0700 Subject: radix-tree: rename indirect_to_ptr() to entry_to_node() Mirrors the earlier commit introducing node_to_entry(). Also change the type returned to be a struct radix_tree_node pointer. That lets us simplify a couple of places in the radix tree shrink & extend paths where we could convert an entry into a pointer, modify the node, then convert the pointer back into an entry. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c8cc879046c7..b94aa198dd6b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -442,7 +442,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) return (iter->next_index - iter->index) >> iter_shift(iter); } -static inline void *indirect_to_ptr(void *ptr) +static inline struct radix_tree_node *entry_to_node(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); } @@ -469,7 +469,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return NULL; while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && radix_tree_is_indirect_ptr(slot[1])) { - if (indirect_to_ptr(slot[1]) == canon) { + if (entry_to_node(slot[1]) == canon) { iter->tags >>= 1; iter->index = __radix_tree_iter_add(iter, 1); slot++; @@ -499,12 +499,10 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && radix_tree_is_indirect_ptr(*slot)) { - if (indirect_to_ptr(*slot) == canon) + if (entry_to_node(*slot) == canon) continue; - else { - iter->next_index = iter->index; - break; - } + iter->next_index = iter->index; + break; } if (likely(*slot)) -- cgit v1.2.3 From b194d16c27af905d6e3552f4851bc7d9fee4e90f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:30 -0700 Subject: radix-tree: rename radix_tree_is_indirect_ptr() As with indirect_to_ptr(), ptr_to_indirect() and RADIX_TREE_INDIRECT_PTR, change radix_tree_is_indirect_ptr() to radix_tree_is_internal_node(). Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index b94aa198dd6b..bad63105e37e 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -57,7 +57,7 @@ #define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \ RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE))) -static inline int radix_tree_is_indirect_ptr(void *ptr) +static inline int radix_tree_is_internal_node(void *ptr) { return (int)((unsigned long)ptr & RADIX_TREE_INTERNAL_NODE); } @@ -224,7 +224,7 @@ static inline void *radix_tree_deref_slot_protected(void **pslot, */ static inline int radix_tree_deref_retry(void *arg) { - return unlikely(radix_tree_is_indirect_ptr(arg)); + return unlikely(radix_tree_is_internal_node(arg)); } /** @@ -259,7 +259,7 @@ static inline int radix_tree_exception(void *arg) */ static inline void radix_tree_replace_slot(void **pslot, void *item) { - BUG_ON(radix_tree_is_indirect_ptr(item)); + BUG_ON(radix_tree_is_internal_node(item)); rcu_assign_pointer(*pslot, item); } @@ -468,7 +468,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) if (unlikely(!iter->tags)) return NULL; while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_indirect_ptr(slot[1])) { + radix_tree_is_internal_node(slot[1])) { if (entry_to_node(slot[1]) == canon) { iter->tags >>= 1; iter->index = __radix_tree_iter_add(iter, 1); @@ -498,7 +498,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index = __radix_tree_iter_add(iter, 1); if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_indirect_ptr(*slot)) { + radix_tree_is_internal_node(*slot)) { if (entry_to_node(*slot) == canon) continue; iter->next_index = iter->index; -- cgit v1.2.3 From d604c324524bf61c68182bb27db64656a78fe911 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:45 -0700 Subject: radix-tree: introduce radix_tree_replace_clear_tags() In addition to replacing the entry, we also clear all associated tags. This is really a one-off special for page_cache_tree_delete() which had far too much detailed knowledge about how the radix tree works. For efficiency, factor node_tag_clear() out of radix_tree_tag_clear() It can be used by radix_tree_delete_item() as well as radix_tree_replace_clear_tags(). Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index bad63105e37e..11c8e7cc3920 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -281,9 +281,12 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, struct radix_tree_node *node); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); -unsigned int -radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items); +struct radix_tree_node *radix_tree_replace_clear_tags( + struct radix_tree_root *root, + unsigned long index, void *entry); +unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, + void **results, unsigned long first_index, + unsigned int max_items); unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items); -- cgit v1.2.3 From 78a9be0a0a3367b94af242632c525d22b26f1a87 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 20 May 2016 17:03:51 -0700 Subject: dax: move RADIX_DAX_ definitions to dax.c These don't belong in radix-tree.h any more than PAGECACHE_TAG_* do. Let's try to maintain the idea that radix-tree simply implements an abstract data type. Signed-off-by: NeilBrown Reviewed-by: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 11c8e7cc3920..c2f69e25ba86 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -48,15 +48,6 @@ #define RADIX_TREE_EXCEPTIONAL_ENTRY 2 #define RADIX_TREE_EXCEPTIONAL_SHIFT 2 -#define RADIX_DAX_MASK 0xf -#define RADIX_DAX_SHIFT 4 -#define RADIX_DAX_PTE (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY) -#define RADIX_DAX_PMD (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY) -#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK) -#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT)) -#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \ - RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE))) - static inline int radix_tree_is_internal_node(void *ptr) { return (int)((unsigned long)ptr & RADIX_TREE_INTERNAL_NODE); -- cgit v1.2.3 From 3bcadd6fa6c4fd07ace3626357c824eb532488a6 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:54 -0700 Subject: radix-tree: free up the bottom bit of exceptional entries for reuse We are guaranteed that pointers to radix_tree_nodes always have the bottom two bits clear (because they come from a slab cache, and slab caches have a minimum alignment of sizeof(void *)), so we can redefine 'radix_tree_is_internal_node' to only return true if the bottom two bits have value '01'. This frees up one quarter of the potential values for use by the user. Idea from Neil Brown. Signed-off-by: Matthew Wilcox Suggested-by: Neil Brown Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c2f69e25ba86..cb4b7e8cee81 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -29,28 +29,37 @@ #include /* - * Entries in the radix tree have the low bit set if they refer to a - * radix_tree_node. If the low bit is clear then the entry is user data. - * - * We also use the low bit to indicate that the slot will be freed in the - * next RCU idle period, and users need to re-walk the tree to find the - * new slot for the index that they were looking for. See the comment in - * radix_tree_shrink() for details. + * The bottom two bits of the slot determine how the remaining bits in the + * slot are interpreted: + * + * 00 - data pointer + * 01 - internal entry + * 10 - exceptional entry + * 11 - locked exceptional entry + * + * The internal entry may be a pointer to the next level in the tree, a + * sibling entry, or an indicator that the entry in this slot has been moved + * to another location in the tree and the lookup should be restarted. While + * NULL fits the 'data pointer' pattern, it means that there is no entry in + * the tree for this index (no matter what level of the tree it is found at). + * This means that you cannot store NULL in the tree as a value for the index. */ -#define RADIX_TREE_INTERNAL_NODE 1 +#define RADIX_TREE_ENTRY_MASK 3UL +#define RADIX_TREE_INTERNAL_NODE 1UL /* - * A common use of the radix tree is to store pointers to struct pages; - * but shmem/tmpfs needs also to store swap entries in the same tree: - * those are marked as exceptional entries to distinguish them. + * Most users of the radix tree store pointers but shmem/tmpfs stores swap + * entries in the same tree. They are marked as exceptional entries to + * distinguish them from pointers to struct page. * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it. */ #define RADIX_TREE_EXCEPTIONAL_ENTRY 2 #define RADIX_TREE_EXCEPTIONAL_SHIFT 2 -static inline int radix_tree_is_internal_node(void *ptr) +static inline bool radix_tree_is_internal_node(void *ptr) { - return (int)((unsigned long)ptr & RADIX_TREE_INTERNAL_NODE); + return ((unsigned long)ptr & RADIX_TREE_ENTRY_MASK) == + RADIX_TREE_INTERNAL_NODE; } /*** radix-tree API starts here ***/ @@ -236,8 +245,7 @@ static inline int radix_tree_exceptional_entry(void *arg) */ static inline int radix_tree_exception(void *arg) { - return unlikely((unsigned long)arg & - (RADIX_TREE_INTERNAL_NODE | RADIX_TREE_EXCEPTIONAL_ENTRY)); + return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK); } /** -- cgit v1.2.3 From acc93d30d7d43f428272c20a047389c4cbca82ba Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 7 May 2016 11:40:28 -0700 Subject: Revert "block: enable dax for raw block devices" This reverts commit 5a023cdba50c5f5f2bc351783b3131699deb3937. The functionality is superseded by the new "Device DAX" facility. Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Dave Chinner Cc: Andrew Morton Cc: Ross Zwisler Cc: Jan Kara Signed-off-by: Dan Williams --- include/linux/fs.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 70e61b58baaf..8363a10660f6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2320,14 +2320,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -#ifdef CONFIG_FS_DAX -extern bool blkdev_dax_capable(struct block_device *bdev); -#else -static inline bool blkdev_dax_capable(struct block_device *bdev) -{ - return false; -} -#endif extern struct super_block *blockdev_superblock; -- cgit v1.2.3 From 5726d0b454614a47e641a04c8106392d67a8e1ad Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 23 May 2016 16:23:06 -0700 Subject: nilfs2: remove FSF mailing address from GPL notices This removes the extra paragraph which mentions FSF address in GPL notices from source code of nilfs2 and avoids the checkpatch.pl error related to it. Link: http://lkml.kernel.org/r/1461935747-10380-4-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nilfs2_fs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index e9fcf90b270d..0d7f8141382a 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -13,10 +13,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Written by Koji Sato * Ryusuke Konishi */ -- cgit v1.2.3 From 4b420ab4eedc7a816ad0f2278871019de1a8ccef Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 23 May 2016 16:23:09 -0700 Subject: nilfs2: clean up old e-mail addresses E-mail addresses of osrg.net domain are no longer available. This removes them from authorship notices and prevents reporters from being confused. Link: http://lkml.kernel.org/r/1461935747-10380-5-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nilfs2_fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 0d7f8141382a..823d63d61081 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -13,8 +13,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * - * Written by Koji Sato - * Ryusuke Konishi + * Written by Koji Sato and Ryusuke Konishi. */ /* * linux/include/linux/ext2_fs.h -- cgit v1.2.3 From 0c6c44cb9f93f7c0ad803b41ae7c0b08cf6942e2 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 23 May 2016 16:23:39 -0700 Subject: nilfs2: avoid bare use of 'unsigned' This fixes checkpatch.pl warning "WARNING: Prefer 'unsigned int' to bare use of 'unsigned'". Link: http://lkml.kernel.org/r/1462886671-3521-5-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nilfs2_fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 823d63d61081..3b584925d0e8 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -322,9 +322,9 @@ enum { ~NILFS_DIR_ROUND) #define NILFS_MAX_REC_LEN ((1<<16)-1) -static inline unsigned nilfs_rec_len_from_disk(__le16 dlen) +static inline unsigned int nilfs_rec_len_from_disk(__le16 dlen) { - unsigned len = le16_to_cpu(dlen); + unsigned int len = le16_to_cpu(dlen); #if !defined(__KERNEL__) || (PAGE_SIZE >= 65536) if (len == NILFS_MAX_REC_LEN) @@ -333,7 +333,7 @@ static inline unsigned nilfs_rec_len_from_disk(__le16 dlen) return len; } -static inline __le16 nilfs_rec_len_to_disk(unsigned len) +static inline __le16 nilfs_rec_len_to_disk(unsigned int len) { #if !defined(__KERNEL__) || (PAGE_SIZE >= 65536) if (len == (1 << 16)) -- cgit v1.2.3 From 076a378ba6e6b6ddd5f2336aa0876349b7d36409 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 23 May 2016 16:23:48 -0700 Subject: nilfs2: fix block comments This fixes block comments with proper formatting to eliminate the following checkpatch.pl warnings: "WARNING: Block comments use * on subsequent lines" "WARNING: Block comments use a trailing */ on a separate line" Link: http://lkml.kernel.org/r/1462886671-3521-8-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nilfs2_fs.h | 66 ++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 3b584925d0e8..5988dd57ba66 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -127,10 +127,14 @@ struct nilfs_super_root { #define NILFS_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ #define NILFS_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ #define NILFS_MOUNT_BARRIER 0x1000 /* Use block barriers */ -#define NILFS_MOUNT_STRICT_ORDER 0x2000 /* Apply strict in-order - semantics also for data */ -#define NILFS_MOUNT_NORECOVERY 0x4000 /* Disable write access during - mount-time recovery */ +#define NILFS_MOUNT_STRICT_ORDER 0x2000 /* + * Apply strict in-order + * semantics also for data + */ +#define NILFS_MOUNT_NORECOVERY 0x4000 /* + * Disable write access during + * mount-time recovery + */ #define NILFS_MOUNT_DISCARD 0x8000 /* Issue DISCARD requests */ @@ -142,16 +146,20 @@ struct nilfs_super_block { __le16 s_minor_rev_level; /* minor revision level */ __le16 s_magic; /* Magic signature */ - __le16 s_bytes; /* Bytes count of CRC calculation - for this structure. s_reserved - is excluded. */ + __le16 s_bytes; /* + * Bytes count of CRC calculation + * for this structure. s_reserved + * is excluded. + */ __le16 s_flags; /* flags */ __le32 s_crc_seed; /* Seed value of CRC calculation */ /*10*/ __le32 s_sum; /* Check sum of super block */ - __le32 s_log_block_size; /* Block size represented as follows - blocksize = - 1 << (s_log_block_size + 10) */ + __le32 s_log_block_size; /* + * Block size represented as follows + * blocksize = + * 1 << (s_log_block_size + 10) + */ __le64 s_nsegments; /* Number of segments in filesystem */ /*20*/ __le64 s_dev_size; /* block device size in bytes */ __le64 s_first_data_block; /* 1st seg disk block number */ @@ -163,8 +171,10 @@ struct nilfs_super_block { __le64 s_last_seq; /* seq. number of seg written last */ /*50*/ __le64 s_free_blocks_count; /* Free blocks count */ - __le64 s_ctime; /* Creation time (execution time of - newfs) */ + __le64 s_ctime; /* + * Creation time (execution time of + * newfs) + */ /*60*/ __le64 s_mtime; /* Mount time */ __le64 s_wtime; /* Write time */ /*70*/ __le16 s_mnt_count; /* Mount count */ @@ -188,8 +198,10 @@ struct nilfs_super_block { /*A8*/ char s_volume_name[80]; /* volume name */ /*F8*/ __le32 s_c_interval; /* Commit interval of segment */ - __le32 s_c_block_max; /* Threshold of data amount for - the segment construction */ + __le32 s_c_block_max; /* + * Threshold of data amount for + * the segment construction + */ /*100*/ __le64 s_feature_compat; /* Compatible feature set */ __le64 s_feature_compat_ro; /* Read-only compatible feature set */ __le64 s_feature_incompat; /* Incompatible feature set */ @@ -242,12 +254,18 @@ struct nilfs_super_block { #define NILFS_SB_OFFSET_BYTES 1024 /* byte offset of nilfs superblock */ -#define NILFS_SEG_MIN_BLOCKS 16 /* Minimum number of blocks in - a full segment */ -#define NILFS_PSEG_MIN_BLOCKS 2 /* Minimum number of blocks in - a partial segment */ -#define NILFS_MIN_NRSVSEGS 8 /* Minimum number of reserved - segments */ +#define NILFS_SEG_MIN_BLOCKS 16 /* + * Minimum number of blocks in + * a full segment + */ +#define NILFS_PSEG_MIN_BLOCKS 2 /* + * Minimum number of blocks in + * a partial segment + */ +#define NILFS_MIN_NRSVSEGS 8 /* + * Minimum number of reserved + * segments + */ /* * We call DAT, cpfile, and sufile root metadata files. Inodes of @@ -513,9 +531,11 @@ struct nilfs_checkpoint { __le64 cp_inodes_count; __le64 cp_blocks_count; - /* Do not change the byte offset of ifile inode. - To keep the compatibility of the disk format, - additional fields should be added behind cp_ifile_inode. */ + /* + * Do not change the byte offset of ifile inode. + * To keep the compatibility of the disk format, + * additional fields should be added behind cp_ifile_inode. + */ struct nilfs_inode cp_ifile_inode; }; -- cgit v1.2.3 From c96fc2d85f4a827e3bb2abe7de2394a1fb8a0fe7 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 23 May 2016 16:23:57 -0700 Subject: signal: make oom_flags a bool Currently the size of "struct signal_struct"->oom_flags member is sizeof(unsigned) bytes, but only one flag OOM_FLAG_ORIGIN which is updated by current thread is defined. We can convert OOM_FLAG_ORIGIN into a bool, and reuse the saved bytes for updating from the OOM killer and/or the OOM reaper thread. By the way, do we care about a race window between run_store() and swapoff() because it would be theoretically possible that two threads sharing the "struct signal_struct" concurrently call respective functions? If we care, we can make oom_flags an atomic_t. Signed-off-by: Tetsuo Handa Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 9 +++------ include/linux/sched.h | 6 +++++- include/linux/types.h | 1 - 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index d3f533f2f481..83469522690a 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -50,24 +50,21 @@ enum oom_scan_t { OOM_SCAN_SELECT, /* always select this thread first */ }; -/* Thread is the potential origin of an oom condition; kill first on oom */ -#define OOM_FLAG_ORIGIN ((__force oom_flags_t)0x1) - extern struct mutex oom_lock; static inline void set_current_oom_origin(void) { - current->signal->oom_flags |= OOM_FLAG_ORIGIN; + current->signal->oom_flag_origin = true; } static inline void clear_current_oom_origin(void) { - current->signal->oom_flags &= ~OOM_FLAG_ORIGIN; + current->signal->oom_flag_origin = false; } static inline bool oom_task_origin(const struct task_struct *p) { - return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN); + return p->signal->oom_flag_origin; } extern void mark_oom_victim(struct task_struct *tsk); diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c036de6c1ee..21c26e78aec5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -794,7 +794,11 @@ struct signal_struct { struct tty_audit_buf *tty_audit_buf; #endif - oom_flags_t oom_flags; + /* + * Thread is the potential origin of an oom condition; kill first on + * oom + */ + bool oom_flag_origin; short oom_score_adj; /* OOM kill score adjustment */ short oom_score_adj_min; /* OOM kill score adjustment min value. * Only settable by CAP_SYS_RESOURCE. */ diff --git a/include/linux/types.h b/include/linux/types.h index 70dd3dfde631..baf718324f4a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -156,7 +156,6 @@ typedef u32 dma_addr_t; typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; -typedef unsigned __bitwise__ oom_flags_t; #ifdef CONFIG_PHYS_ADDR_T_64BIT typedef u64 phys_addr_t; -- cgit v1.2.3 From 5c8ccefdf46c5f87d87b694c7fbc04941c2c99a5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 23 May 2016 16:24:02 -0700 Subject: signal: move the "sig < SIGRTMIN" check into siginmask(sig) All the users of siginmask() must ensure that sig < SIGRTMIN. sig_fatal() doesn't and this is wrong: UBSAN: Undefined behaviour in kernel/signal.c:911:6 shift exponent 32 is too large for 32-bit type 'long unsigned int' the patch doesn't add the neccesary check to sig_fatal(), it moves the check into siginmask() and updates other callers. Link: http://lkml.kernel.org/r/20160517195052.GA15187@redhat.com Reported-by: Meelis Roos Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 639be264f5f9..b63f63eaa39c 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -400,7 +400,9 @@ int unhandled_signal(struct task_struct *tsk, int sig); #else #define rt_sigmask(sig) sigmask(sig) #endif -#define siginmask(sig, mask) (rt_sigmask(sig) & (mask)) + +#define siginmask(sig, mask) \ + ((sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) #define SIG_KERNEL_ONLY_MASK (\ rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) @@ -421,14 +423,10 @@ int unhandled_signal(struct task_struct *tsk, int sig); rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) ) -#define sig_kernel_only(sig) \ - (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_ONLY_MASK)) -#define sig_kernel_coredump(sig) \ - (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_COREDUMP_MASK)) -#define sig_kernel_ignore(sig) \ - (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_IGNORE_MASK)) -#define sig_kernel_stop(sig) \ - (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_STOP_MASK)) +#define sig_kernel_only(sig) siginmask(sig, SIG_KERNEL_ONLY_MASK) +#define sig_kernel_coredump(sig) siginmask(sig, SIG_KERNEL_COREDUMP_MASK) +#define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK) +#define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK) #define sig_user_defined(t, signr) \ (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ -- cgit v1.2.3 From 9b492cf58077a0254eb4b9574029ac6e79add9f9 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Mon, 23 May 2016 16:24:10 -0700 Subject: kexec: introduce a protection mechanism for the crashkernel reserved memory For the cases that some kernel (module) path stamps the crash reserved memory(already mapped by the kernel) where has been loaded the second kernel data, the kdump kernel will probably fail to boot when panic happens (or even not happens) leaving the culprit at large, this is unacceptable. The patch introduces a mechanism for detecting such cases: 1) After each crash kexec loading, it simply marks the reserved memory regions readonly since we no longer access it after that. When someone stamps the region, the first kernel will panic and trigger the kdump. The weak arch_kexec_protect_crashkres() is introduced to do the actual protection. 2) To allow multiple loading, once 1) was done we also need to remark the reserved memory to readwrite each time a system call related to kdump is made. The weak arch_kexec_unprotect_crashkres() is introduced to do the actual protection. The architecture can make its specific implementation by overriding arch_kexec_protect_crashkres() and arch_kexec_unprotect_crashkres(). Signed-off-by: Xunlei Pang Cc: Eric Biederman Cc: Dave Young Cc: Minfei Huang Cc: Vivek Goyal Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 2cc643c6e870..643ff4a3fbf6 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -317,6 +317,8 @@ int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned int relsec); int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned int relsec); +void arch_kexec_protect_crashkres(void); +void arch_kexec_unprotect_crashkres(void); #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; -- cgit v1.2.3 From 7a0058ec78602da02b34fa2ae3afc523e90d1ab2 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Mon, 23 May 2016 16:24:22 -0700 Subject: s390/kexec: consolidate crash_map/unmap_reserved_pages() and arch_kexec_protect(unprotect)_crashkres() Commit 3f625002581b ("kexec: introduce a protection mechanism for the crashkernel reserved memory") is a similar mechanism for protecting the crash kernel reserved memory to previous crash_map/unmap_reserved_pages() implementation, the new one is more generic in name and cleaner in code (besides, some arch may not be allowed to unmap the pgtable). Therefore, this patch consolidates them, and uses the new arch_kexec_protect(unprotect)_crashkres() to replace former crash_map/unmap_reserved_pages() which by now has been only used by S390. The consolidation work needs the crash memory to be mapped initially, this is done in machine_kdump_pm_init() which is after reserve_crashkernel(). Once kdump kernel is loaded, the new arch_kexec_protect_crashkres() implemented for S390 will actually unmap the pgtable like before. Signed-off-by: Xunlei Pang Signed-off-by: Michael Holzheu Acked-by: Michael Holzheu Cc: Heiko Carstens Cc: "Eric W. Biederman" Cc: Minfei Huang Cc: Vivek Goyal Cc: Dave Young Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 643ff4a3fbf6..e8acb2b43dd9 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -230,8 +230,6 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); void crash_save_cpu(struct pt_regs *regs, int cpu); void crash_save_vmcoreinfo(void); -void crash_map_reserved_pages(void); -void crash_unmap_reserved_pages(void); void arch_crash_save_vmcoreinfo(void); __printf(1, 2) void vmcoreinfo_append_str(const char *fmt, ...); -- cgit v1.2.3 From 9fbeb5ab59a2b2a09cca2eb68283e7a090d4b98d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 23 May 2016 16:25:30 -0700 Subject: mm: make vm_mmap killable All the callers of vm_mmap seem to check for the failure already and bail out in one way or another on the error which means that we can change it to use killable version of vm_mmap_pgoff and return -EINTR if the current task gets killed while waiting for mmap_sem. This also means that vm_mmap_pgoff can be killable by default and drop the additional parameter. This will help in the OOM conditions when the oom victim might be stuck waiting for the mmap_sem for write which in turn can block oom_reaper which relies on the mmap_sem for read to make a forward progress and reclaim the address space of the victim. Please note that load_elf_binary is ignoring vm_mmap error for current->personality & MMAP_PAGE_ZERO case but that shouldn't be a problem because the address is not used anywhere and we never return to the userspace if we got killed. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Oleg Nesterov Cc: Andrea Arcangeli Cc: Al Viro Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index b530c99e8e81..d5eb8dddd7c0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2013,7 +2013,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {} /* These take the mm semaphore themselves */ extern unsigned long vm_brk(unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); -extern unsigned long vm_mmap(struct file *, unsigned long, +extern unsigned long __must_check vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -- cgit v1.2.3 From 2d6c928241add2848e4eebfce407e95164229976 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 23 May 2016 16:25:42 -0700 Subject: mm: make vm_brk killable Now that all the callers handle vm_brk failure we can change it wait for mmap_sem killable to help oom_reaper to not get blocked just because vm_brk gets blocked behind mmap_sem readers. Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Oleg Nesterov Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index d5eb8dddd7c0..2835d598d258 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2011,7 +2011,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif /* These take the mm semaphore themselves */ -extern unsigned long vm_brk(unsigned long, unsigned long); +extern unsigned long __must_check vm_brk(unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, -- cgit v1.2.3 From b7e7ade34e6188bee2e3b0d42b51d25137d9e2a5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 May 2016 11:19:07 +0200 Subject: sched/core: Fix remote wakeups Commit: b5179ac70de8 ("sched/fair: Prepare to fix fairness problems on migration") ... introduced a bug: Mike Galbraith found that it introduced a performance regression, while Paul E. McKenney reported lost wakeups and bisected it to this commit. The reason is that I mis-read ttwu_queue() such that I assumed any wakeup that got a remote queue must have had the task migrated. Since this is not so; we need to transfer this information between queueing the wakeup and actually doing the wakeup. Use a new task_struct::sched_flag for this, we already write to sched_contributes_to_load in the wakeup path so this is a hot and modified cacheline. Reported-by: Paul E. McKenney Reported-by: Mike Galbraith Tested-by: Mike Galbraith Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Ben Segall Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matt Fleming Cc: Morten Rasmussen Cc: Oleg Nesterov Cc: Paul Turner Cc: Pavan Kondeti Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Cc: byungchul.park@lge.com Fixes: b5179ac70de8 ("sched/fair: Prepare to fix fairness problems on migration") Link: http://lkml.kernel.org/r/20160523091907.GD15728@worktop.ger.corp.intel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6cc0df970f1a..e053517a88b6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1533,6 +1533,7 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; + unsigned sched_remote_wakeup:1; unsigned :0; /* force alignment to the next boundary */ /* unserialized, strictly 'current' */ -- cgit v1.2.3 From 536a6f88c49dd739961ffd53774775afed852c83 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 18 May 2016 13:26:23 +0200 Subject: KVM: Create debugfs dir and stat files for each VM This patch adds a kvm debugfs subdirectory for each VM, which is named after its pid and file descriptor. The directories contain the same kind of files that are already in the kvm debugfs directory, but the data exported through them is now VM specific. This makes the debugfs kvm data a convenient alternative to the tracepoints which already have per VM data. The debugfs data is easy to read and low overhead. CC: Dan Carpenter [includes fixes by Dan Carpenter] Signed-off-by: Janosch Frank Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b1fa8f11c95b..1c9c973a7dd9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -412,6 +412,8 @@ struct kvm { #endif long tlbs_dirty; struct list_head devices; + struct dentry *debugfs_dentry; + struct kvm_stat_data **debugfs_stat_data; }; #define kvm_err(fmt, ...) \ @@ -991,6 +993,11 @@ enum kvm_stat_kind { KVM_STAT_VCPU, }; +struct kvm_stat_data { + int offset; + struct kvm *kvm; +}; + struct kvm_stats_debugfs_item { const char *name; int offset; -- cgit v1.2.3 From 13d1ad16d05eebb4db977eb955716b9da2c19fbd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 27 Apr 2016 14:15:51 +0200 Subject: libceph: move message allocation out of ceph_osdc_alloc_request() The size of ->r_request and ->r_reply messages depends on the size of the object name (ceph_object_id), while the size of ceph_osd_request is fixed. Move message allocation into a separate function that would have to be called after ceph_object_id and ceph_object_locator (which is also going to become variable in size with RADOS namespaces) have been filled in: req = ceph_osdc_alloc_request(...); r_base_oid> r_base_oloc> ceph_osdc_alloc_messages(req); Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index cbf460927c42..66a1fcd5bff7 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -322,6 +322,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client * unsigned int num_ops, bool use_mempool, gfp_t gfp_flags); +int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp); extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, struct ceph_snap_context *snapc, -- cgit v1.2.3 From d30291b985d1854565d7f2c82a4457869d5265e8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 29 Apr 2016 19:54:20 +0200 Subject: libceph: variable-sized ceph_object_id Currently ceph_object_id can hold object names of up to 100 (CEPH_MAX_OID_NAME_LEN) characters. This is enough for all use cases, expect one - long rbd image names: - a format 1 header is named ".rbd" - an object that points to a format 2 header is named "rbd_id." We operate on these potentially long-named objects during rbd map, and, for format 1 images, during header refresh. (A format 2 header name is a small system-generated string.) Lift this 100 character limit by making ceph_object_id be able to point to an externally-allocated string. Apart from being able to work with almost arbitrarily-long named objects, this allows us to reduce the size of ceph_object_id from >100 bytes to 64 bytes. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 62 +++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e55c08bc3a96..777a29412706 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -64,11 +64,47 @@ struct ceph_object_locator { */ #define CEPH_MAX_OID_NAME_LEN 100 +/* + * 51-char inline_name is long enough for all cephfs and all but one + * rbd requests: in ".rbd"/"rbd_id." can be + * arbitrarily long (~PAGE_SIZE). It's done once during rbd map; all + * other rbd requests fit into inline_name. + * + * Makes ceph_object_id 64 bytes on 64-bit. + */ +#define CEPH_OID_INLINE_LEN 52 + +/* + * Both inline and external buffers have space for a NUL-terminator, + * which is carried around. It's not required though - RADOS object + * names don't have to be NUL-terminated and may contain NULs. + */ struct ceph_object_id { - char name[CEPH_MAX_OID_NAME_LEN]; + char *name; + char inline_name[CEPH_OID_INLINE_LEN]; int name_len; }; +static inline void ceph_oid_init(struct ceph_object_id *oid) +{ + oid->name = oid->inline_name; + oid->name_len = 0; +} + +static inline bool ceph_oid_empty(const struct ceph_object_id *oid) +{ + return oid->name == oid->inline_name && !oid->name_len; +} + +void ceph_oid_copy(struct ceph_object_id *dest, + const struct ceph_object_id *src); +__printf(2, 3) +void ceph_oid_printf(struct ceph_object_id *oid, const char *fmt, ...); +__printf(3, 4) +int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp, + const char *fmt, ...); +void ceph_oid_destroy(struct ceph_object_id *oid); + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; @@ -113,30 +149,6 @@ struct ceph_osdmap { int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; }; -static inline void ceph_oid_set_name(struct ceph_object_id *oid, - const char *name) -{ - int len; - - len = strlen(name); - if (len > sizeof(oid->name)) { - WARN(1, "ceph_oid_set_name '%s' len %d vs %zu, truncating\n", - name, len, sizeof(oid->name)); - len = sizeof(oid->name); - } - - memcpy(oid->name, name, len); - oid->name_len = len; -} - -static inline void ceph_oid_copy(struct ceph_object_id *dest, - struct ceph_object_id *src) -{ - BUG_ON(src->name_len > sizeof(dest->name)); - memcpy(dest->name, src->name, src->name_len); - dest->name_len = src->name_len; -} - static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd) { return osd >= 0 && osd < map->max_osd && -- cgit v1.2.3 From 0c0a8de13f9612a663b050afa955e6668858d1eb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:21 +0200 Subject: libceph: nuke unused fields and functions Either unused or useless: osdmap->mkfs_epoch osd->o_marked_for_keepalive monc->num_generic_requests osdc->map_waiters osdc->last_requested_map osdc->timeout_tid osd_req_op_cls_response_data() osdmap_apply_incremental() @msgr arg Signed-off-by: Ilya Dryomov --- include/linux/ceph/mon_client.h | 1 - include/linux/ceph/osd_client.h | 8 -------- include/linux/ceph/osdmap.h | 6 ++---- 3 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index e230e7ed60d3..330d045e4092 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -77,7 +77,6 @@ struct ceph_mon_client { /* pending generic requests */ struct rb_root generic_request_tree; - int num_generic_requests; u64 last_tid; /* subs, indexed with CEPH_SUB_* */ diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 66a1fcd5bff7..63854a8df183 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -37,11 +37,9 @@ struct ceph_osd { struct list_head o_osd_lru; struct ceph_auth_handshake o_auth; unsigned long lru_ttl; - int o_marked_for_keepalive; struct list_head o_keepalive_item; }; - #define CEPH_OSD_SLAB_OPS 2 #define CEPH_OSD_MAX_OPS 16 @@ -206,13 +204,10 @@ struct ceph_osd_client { struct ceph_osdmap *osdmap; /* current map */ struct rw_semaphore map_sem; - struct completion map_waiters; - u64 last_requested_map; struct mutex request_mutex; struct rb_root osds; /* osds */ struct list_head osd_lru; /* idle osds */ - u64 timeout_tid; /* tid of timeout triggering rq */ u64 last_tid; /* tid of last request */ struct rb_root requests; /* pending requests */ struct list_head req_lru; /* in-flight lru */ @@ -271,9 +266,6 @@ extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, extern struct ceph_osd_data *osd_req_op_extent_osd_data( struct ceph_osd_request *osd_req, unsigned int which); -extern struct ceph_osd_data *osd_req_op_cls_response_data( - struct ceph_osd_request *osd_req, - unsigned int which); extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, unsigned int which, diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 777a29412706..ce7a41a182d4 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -123,7 +123,6 @@ struct ceph_pg_mapping { struct ceph_osdmap { struct ceph_fsid fsid; u32 epoch; - u32 mkfs_epoch; struct ceph_timespec created, modified; u32 flags; /* CEPH_OSDMAP_* */ @@ -205,9 +204,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) } extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); -extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, - struct ceph_osdmap *map, - struct ceph_messenger *msgr); +struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, + struct ceph_osdmap *map); extern void ceph_osdmap_destroy(struct ceph_osdmap *map); /* calculate mapping of a file extent to an object */ -- cgit v1.2.3 From fcd00b68bbe2bf5606cb45c2cd4a250a390bcc1f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:22 +0200 Subject: libceph: DEFINE_RB_FUNCS macro Given struct foo { u64 id; struct rb_node bar_node; }; generate insert_bar(), erase_bar() and lookup_bar() functions with DEFINE_RB_FUNCS(bar, struct foo, id, bar_node) The key is assumed to be an integer (u64, int, etc), compared with < and >. nodefld has to be initialized with RB_CLEAR_NODE(). Start using it for MDS, MON and OSD requests and OSD sessions. Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 57 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index db92a8d4926e..690985daad1c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -180,6 +180,63 @@ static inline int calc_pages_for(u64 off, u64 len) (off >> PAGE_SHIFT); } +/* + * These are not meant to be generic - an integer key is assumed. + */ +#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +static void insert_##name(struct rb_root *root, type *t) \ +{ \ + struct rb_node **n = &root->rb_node; \ + struct rb_node *parent = NULL; \ + \ + BUG_ON(!RB_EMPTY_NODE(&t->nodefld)); \ + \ + while (*n) { \ + type *cur = rb_entry(*n, type, nodefld); \ + \ + parent = *n; \ + if (t->keyfld < cur->keyfld) \ + n = &(*n)->rb_left; \ + else if (t->keyfld > cur->keyfld) \ + n = &(*n)->rb_right; \ + else \ + BUG(); \ + } \ + \ + rb_link_node(&t->nodefld, parent, n); \ + rb_insert_color(&t->nodefld, root); \ +} \ +static void erase_##name(struct rb_root *root, type *t) \ +{ \ + BUG_ON(RB_EMPTY_NODE(&t->nodefld)); \ + rb_erase(&t->nodefld, root); \ + RB_CLEAR_NODE(&t->nodefld); \ +} + +#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \ +static type *lookup_##name(struct rb_root *root, \ + typeof(((type *)0)->keyfld) key) \ +{ \ + struct rb_node *n = root->rb_node; \ + \ + while (n) { \ + type *cur = rb_entry(n, type, nodefld); \ + \ + if (key < cur->keyfld) \ + n = n->rb_left; \ + else if (key > cur->keyfld) \ + n = n->rb_right; \ + else \ + return cur; \ + } \ + \ + return NULL; \ +} + +#define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \ +DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) + extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; -- cgit v1.2.3 From 985c1673885b77b2e0167c6478a833817d1e2fe5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:22 +0200 Subject: libceph: fix ceph_eversion encoding eversion_t is version+epoch in userspace and is encoded in that order. ceph_eversion is defined as epoch+version in rados.h, yet we memcpy it in __send_request(). Reoder ceph_eversion fields. Signed-off-by: Ilya Dryomov --- include/linux/ceph/rados.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2f822dca1046..913c87c26d33 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -114,8 +114,8 @@ struct ceph_object_layout { * compound epoch+version, used by storage layer to serialize mutations */ struct ceph_eversion { - __le32 epoch; __le64 version; + __le32 epoch; } __attribute__ ((packed)); /* -- cgit v1.2.3 From d9591f5e28686277d9312d3c7422faf1368b305e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:22 +0200 Subject: libceph: rename ceph_oloc_oid_to_pg() Rename ceph_oloc_oid_to_pg() to ceph_object_locator_to_pg(). Emphasise that returned is raw PG and return -ENOENT instead of -EIO if the pool doesn't exist. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index ce7a41a182d4..b70440c05b49 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -213,11 +213,10 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); -/* calculate mapping of object to a placement group */ -extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, - struct ceph_object_locator *oloc, - struct ceph_object_id *oid, - struct ceph_pg *pg_out); +int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + struct ceph_pg *raw_pgid); extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, -- cgit v1.2.3 From 6f3bfd45cd233eea0b07e3cabc0386b5de9321d2 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:22 +0200 Subject: libceph: ceph_osds, ceph_pg_to_up_acting_osds() Knowning just acting set isn't enough, we need to be able to record up set as well to detect interval changes. This means returning (up[], up_len, up_primary, acting[], acting_len, acting_primary) and passing it around. Introduce and switch to ceph_osds to help with that. Rename ceph_calc_pg_acting() to ceph_pg_to_up_acting_osds() and return both up and acting sets from it. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index b70440c05b49..942189d311e0 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -208,6 +208,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *map); extern void ceph_osdmap_destroy(struct ceph_osdmap *map); +struct ceph_osds { + int osds[CEPH_PG_MAX_SIZE]; + int size; + int primary; /* id, NOT index */ +}; + +static inline void ceph_osds_init(struct ceph_osds *set) +{ + set->size = 0; + set->primary = -1; +} + +void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src); + /* calculate mapping of a file extent to an object */ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 off, u64 len, @@ -218,9 +232,10 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, struct ceph_object_locator *oloc, struct ceph_pg *raw_pgid); -extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, - struct ceph_pg pgid, - int *osds, int *primary); +void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap, + const struct ceph_pg *raw_pgid, + struct ceph_osds *up, + struct ceph_osds *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); -- cgit v1.2.3 From f81f16339a05775df600b2ff75a79be1864975c1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:23 +0200 Subject: libceph: rename ceph_calc_pg_primary() Rename ceph_calc_pg_primary() to ceph_pg_to_acting_primary() to emphasise that it returns acting primary. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 942189d311e0..3fd978a1639b 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -236,8 +236,8 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap, const struct ceph_pg *raw_pgid, struct ceph_osds *up, struct ceph_osds *acting); -extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, - struct ceph_pg pgid); +int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap, + const struct ceph_pg *raw_pgid); extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id); -- cgit v1.2.3 From f984cb76cc5fb9fc76d6abb6c4694a5412e3f49b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:23 +0200 Subject: libceph: make pgid_cmp() global calc_target() code is going to need to know how to compare PGs. Take lhs and rhs pgid by const * while at it. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 3fd978a1639b..7783237ab06c 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -24,6 +24,8 @@ struct ceph_pg { uint32_t seed; }; +int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs); + #define CEPH_POOL_FLAG_HASHPSPOOL 1 struct ceph_pg_pool_info { -- cgit v1.2.3 From 04812acf572ef41fd51c11e0bf3385f34c0e1b5b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:23 +0200 Subject: libceph: pi->min_size, pi->last_force_request_resend Add and decode pi->min_size and pi->last_force_request_resend. These are going to be used by calc_target(). Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 7783237ab06c..989294d0b8d2 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -26,20 +26,23 @@ struct ceph_pg { int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs); -#define CEPH_POOL_FLAG_HASHPSPOOL 1 +#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id + together */ struct ceph_pg_pool_info { struct rb_node node; s64 id; - u8 type; + u8 type; /* CEPH_POOL_TYPE_* */ u8 size; + u8 min_size; u8 crush_ruleset; u8 object_hash; + u32 last_force_request_resend; u32 pg_num, pgp_num; int pg_num_mask, pgp_num_mask; s64 read_tier; s64 write_tier; /* wins for read+write ops */ - u64 flags; + u64 flags; /* CEPH_POOL_FLAG_* */ char *name; }; -- cgit v1.2.3 From 63244fa123a755e4bbaee03022b68613c71d1332 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:23 +0200 Subject: libceph: introduce ceph_osd_request_target, calc_target() Introduce ceph_osd_request_target, containing all mapping-related fields of ceph_osd_request and calc_target() for calculating mappings and populating it. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 23 +++++++++++++++++++++++ include/linux/ceph/osdmap.h | 34 ++++++++++++++++++++++++++++++++++ include/linux/ceph/rados.h | 5 +++++ 3 files changed, 62 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 63854a8df183..48806ee4488d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -24,6 +24,8 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *, struct ceph_msg *); typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool); +#define CEPH_HOMELESS_OSD -1 + /* a given osd we're communicating with */ struct ceph_osd { atomic_t o_ref; @@ -118,6 +120,27 @@ struct ceph_osd_req_op { }; }; +struct ceph_osd_request_target { + struct ceph_object_id base_oid; + struct ceph_object_locator base_oloc; + struct ceph_object_id target_oid; + struct ceph_object_locator target_oloc; + + struct ceph_pg pgid; + u32 pg_num; + u32 pg_num_mask; + struct ceph_osds acting; + struct ceph_osds up; + int size; + int min_size; + bool sort_bitwise; + + unsigned int flags; /* CEPH_OSD_FLAG_* */ + bool paused; + + int osd; +}; + /* an in-flight request */ struct ceph_osd_request { u64 r_tid; /* unique for this client */ diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 989294d0b8d2..420bb7968b25 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -28,6 +28,7 @@ int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs); #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id together */ +#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */ struct ceph_pg_pool_info { struct rb_node node; @@ -62,6 +63,22 @@ struct ceph_object_locator { s64 pool; }; +static inline void ceph_oloc_init(struct ceph_object_locator *oloc) +{ + oloc->pool = -1; +} + +static inline bool ceph_oloc_empty(const struct ceph_object_locator *oloc) +{ + return oloc->pool == -1; +} + +static inline void ceph_oloc_copy(struct ceph_object_locator *dest, + const struct ceph_object_locator *src) +{ + dest->pool = src->pool; +} + /* * Maximum supported by kernel client object name length * @@ -227,6 +244,23 @@ static inline void ceph_osds_init(struct ceph_osds *set) void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src); +bool ceph_is_new_interval(const struct ceph_osds *old_acting, + const struct ceph_osds *new_acting, + const struct ceph_osds *old_up, + const struct ceph_osds *new_up, + int old_size, + int new_size, + int old_min_size, + int new_min_size, + u32 old_pg_num, + u32 new_pg_num, + bool old_sort_bitwise, + bool new_sort_bitwise, + const struct ceph_pg *pgid); +bool ceph_osds_changed(const struct ceph_osds *old_acting, + const struct ceph_osds *new_acting, + bool any_change); + /* calculate mapping of a file extent to an object */ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 off, u64 len, diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 913c87c26d33..f28ed864e682 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -153,6 +153,11 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ #define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ #define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ +#define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */ +#define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */ +#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */ +#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */ +#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */ /* * The error code to return when an OSD can't handle a write -- cgit v1.2.3 From a66dd38309f5d9c66ec9bc7911ff8da8cc37bb9f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:23 +0200 Subject: libceph: switch to calc_target(), part 1 Replace __calc_request_pg() and most of __map_request() with calc_target() and start using req->r_t. ceph_osdc_build_request() however still encodes base_oid, because it's called before calc_target() is and target_oid is empty at that point in time; a printf in osdc_show() also shows base_oid. This is fixed in "libceph: switch to calc_target(), part 2". Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 48806ee4488d..03bf9d9e1517 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -150,12 +150,13 @@ struct ceph_osd_request { struct list_head r_linger_item; struct list_head r_linger_osd_item; struct ceph_osd *r_osd; - struct ceph_pg r_pgid; - int r_pg_osds[CEPH_PG_MAX_SIZE]; - int r_num_pg_osds; + + struct ceph_osd_request_target r_t; +#define r_base_oid r_t.base_oid +#define r_base_oloc r_t.base_oloc +#define r_flags r_t.flags struct ceph_msg *r_request, *r_reply; - int r_flags; /* any additional flags for the osd */ u32 r_sent; /* >0 if r_request is sending/sent */ /* request osd ops array */ @@ -167,7 +168,6 @@ struct ceph_osd_request { __le64 *r_request_pool; void *r_request_pgid; __le32 *r_request_attempts; - bool r_paused; struct ceph_eversion *r_request_reassert_version; int r_result; @@ -186,11 +186,6 @@ struct ceph_osd_request { struct inode *r_inode; /* for use by callbacks */ void *r_priv; /* ditto */ - struct ceph_object_locator r_base_oloc; - struct ceph_object_id r_base_oid; - struct ceph_object_locator r_target_oloc; - struct ceph_object_id r_target_oid; - u64 r_snapid; unsigned long r_stamp; /* send OR check time */ -- cgit v1.2.3 From bb873b539154ab51893430b4ad6ba4051775276a Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 26 May 2016 00:29:52 +0200 Subject: libceph: switch to calc_target(), part 2 The crux of this is getting rid of ceph_osdc_build_request(), so that MOSDOp can be encoded not before but after calc_target() calculates the actual target. Encoding now happens within ceph_osdc_start_request(). Also nuked is the accompanying bunch of pointers into the encoded buffer that was used to update fields on each send - instead, the entire front is re-encoded. If we want to support target->name_len != base->name_len in the future, there is no other way, because oid is surrounded by other fields in the encoded buffer. Encoding OSD ops and adding data items to the request message were mixed together in osd_req_encode_op(). While we want to re-encode OSD ops, we don't want to add duplicate data items to the message when resending, so all call to ceph_osdc_msg_data_add() are factored out into a new setup_request_data(). Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 29 +++++++++++------------------ include/linux/ceph/rados.h | 7 +++++++ 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 03bf9d9e1517..67a37d98e0ca 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -104,7 +104,7 @@ struct ceph_osd_req_op { struct ceph_osd_data response_data; __u8 class_len; __u8 method_len; - __u8 argc; + u32 indata_len; } cls; struct { u64 cookie; @@ -162,14 +162,6 @@ struct ceph_osd_request { /* request osd ops array */ unsigned int r_num_ops; - /* these are updated on each send */ - __le32 *r_request_osdmap_epoch; - __le32 *r_request_flags; - __le64 *r_request_pool; - void *r_request_pgid; - __le32 *r_request_attempts; - struct ceph_eversion *r_request_reassert_version; - int r_result; int r_got_reply; int r_linger; @@ -180,16 +172,22 @@ struct ceph_osd_request { struct completion r_completion, r_safe_completion; ceph_osdc_callback_t r_callback; ceph_osdc_unsafe_callback_t r_unsafe_callback; - struct ceph_eversion r_reassert_version; struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ void *r_priv; /* ditto */ - u64 r_snapid; - unsigned long r_stamp; /* send OR check time */ + /* set by submitter */ + u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */ + struct ceph_snap_context *r_snapc; /* for writes */ + struct timespec r_mtime; /* ditto */ + u64 r_data_offset; /* ditto */ - struct ceph_snap_context *r_snapc; /* snap context for writes */ + /* internal */ + unsigned long r_stamp; /* jiffies, send or check time */ + int r_attempts; + struct ceph_eversion r_replay_version; /* aka reassert_version */ + u32 r_last_force_resend; struct ceph_osd_req_op r_ops[]; }; @@ -334,11 +332,6 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client * gfp_t gfp_flags); int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp); -extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, - struct ceph_snap_context *snapc, - u64 snap_id, - struct timespec *mtime); - extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, struct ceph_vino vino, diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index f28ed864e682..28740a58f32c 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -394,6 +394,13 @@ enum { CEPH_OSD_FLAG_SKIPRWLOCKS = 0x10000, /* skip rw locks */ CEPH_OSD_FLAG_IGNORE_OVERLAY = 0x20000, /* ignore pool overlay */ CEPH_OSD_FLAG_FLUSH = 0x40000, /* this is part of flush */ + CEPH_OSD_FLAG_MAP_SNAP_CLONE = 0x80000, /* map snap direct to clone id */ + CEPH_OSD_FLAG_ENFORCE_SNAPC = 0x100000, /* use snapc provided even if + pool uses pool snaps */ + CEPH_OSD_FLAG_REDIRECTED = 0x200000, /* op has been redirected */ + CEPH_OSD_FLAG_KNOWN_REDIR = 0x400000, /* redirect bit is authoritative */ + CEPH_OSD_FLAG_FULL_TRY = 0x800000, /* try op despite full flag */ + CEPH_OSD_FLAG_FULL_FORCE = 0x1000000, /* force op despite full flag */ }; enum { -- cgit v1.2.3 From 85e084feb47349d62989efe1713a8723af95f4ea Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:24 +0200 Subject: libceph: drop msg argument from ceph_osdc_callback_t finish_read(), its only user, uses it to get to hdr.data_len, which is what ->r_result is set to on success. This gains us the ability to safely call callbacks from contexts other than reply, e.g. map check. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 67a37d98e0ca..3bebd60e7f9f 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -20,8 +20,7 @@ struct ceph_osd_client; /* * completion callback for async writepages */ -typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *, - struct ceph_msg *); +typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool); #define CEPH_HOMELESS_OSD -1 -- cgit v1.2.3 From fe5da05e979830b43b115d8a18ead521d507c783 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:24 +0200 Subject: libceph: redo callbacks and factor out MOSDOpReply decoding If you specify ACK | ONDISK and set ->r_unsafe_callback, both ->r_callback and ->r_unsafe_callback(true) are called on ack. This is very confusing. Redo this so that only one of them is called: ->r_unsafe_callback(true), on ack ->r_unsafe_callback(false), on commit or ->r_callback, on ack|commit Decode everything in decode_MOSDOpReply() to reduce clutter. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 3bebd60e7f9f..2415dc0cb008 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -162,13 +162,14 @@ struct ceph_osd_request { unsigned int r_num_ops; int r_result; - int r_got_reply; + bool r_got_reply; int r_linger; struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; - struct completion r_completion, r_safe_completion; + struct completion r_completion; + struct completion r_safe_completion; /* fsync waiter */ ceph_osdc_callback_t r_callback; ceph_osdc_unsafe_callback_t r_unsafe_callback; struct list_head r_unsafe_item; -- cgit v1.2.3 From e5253a7bde13788d9dc75f42eb47ea119af5609f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:25 +0200 Subject: libceph: allocate dummy osdmap in ceph_osdc_init() This leads to a simpler osdmap handling code, particularly when dealing with pi->was_full, which is introduced in a later commit. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 420bb7968b25..8468c734d712 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -225,6 +225,7 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) return 0; } +struct ceph_osdmap *ceph_osdmap_alloc(void); extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *map); -- cgit v1.2.3 From 42c1b1240326cbea86f15f5d4ce565d8b54be31f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:25 +0200 Subject: libceph: handle_one_map() Separate osdmap handling from decoding and iterating over a bag of maps in a fresh MOSDMap message. This sets up the scene for the updated OSD client. Of particular importance here is the addition of pi->was_full, which can be used to answer "did this pool go full -> not-full in this map?". This is the key bit for supporting pool quotas. We won't be able to downgrade map_sem for much longer, so drop downgrade_write(). Signed-off-by: Ilya Dryomov --- include/linux/ceph/mon_client.h | 1 + include/linux/ceph/osdmap.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index 330d045e4092..c14e9d861cda 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -115,6 +115,7 @@ extern const char *ceph_sub_str[]; bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch, bool continuous); void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch); +void ceph_monc_renew_subs(struct ceph_mon_client *monc); extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8468c734d712..821e16fff39a 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -45,6 +45,8 @@ struct ceph_pg_pool_info { s64 write_tier; /* wins for read+write ops */ u64 flags; /* CEPH_POOL_FLAG_* */ char *name; + + bool was_full; /* for handle_one_map() */ }; static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) -- cgit v1.2.3 From 9dd2845ccb40452d4ac943231ea34aade4a02c68 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:26 +0200 Subject: libceph: protect osdc->osd_lru list with a spinlock OSD client is getting moved from the big per-client lock to a set of per-session locks. The big rwlock would only be held for read most of the time, so a global osdc->osd_lru needs additional protection. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2415dc0cb008..486d681694c4 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -224,6 +224,7 @@ struct ceph_osd_client { struct mutex request_mutex; struct rb_root osds; /* osds */ struct list_head osd_lru; /* idle osds */ + spinlock_t osd_lru_lock; u64 last_tid; /* tid of last request */ struct rb_root requests; /* pending requests */ struct list_head req_lru; /* in-flight lru */ -- cgit v1.2.3 From 5aea3dcd50215fa9563270251ad7323e2f2490ee Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:26 +0200 Subject: libceph: a major OSD client update This is a major sync up, up to ~Jewel. The highlights are: - per-session request trees (vs a global per-client tree) - per-session locking (vs a global per-client rwlock) - homeless OSD session - no ad-hoc global per-client lists - support for pool quotas - foundation for watch/notify v2 support - foundation for map check (pool deletion detection) support The switchover is incomplete: lingering requests can be setup and teared down but aren't ever reestablished. This functionality is restored with the introduction of the new lingering infrastructure (ceph_osd_linger_request, linger_work, etc) in a later commit. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 486d681694c4..342f22f1f040 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -33,12 +33,13 @@ struct ceph_osd { int o_incarnation; struct rb_node o_node; struct ceph_connection o_con; - struct list_head o_requests; + struct rb_root o_requests; struct list_head o_linger_requests; struct list_head o_osd_lru; struct ceph_auth_handshake o_auth; unsigned long lru_ttl; struct list_head o_keepalive_item; + struct mutex lock; }; #define CEPH_OSD_SLAB_OPS 2 @@ -144,8 +145,6 @@ struct ceph_osd_request_target { struct ceph_osd_request { u64 r_tid; /* unique for this client */ struct rb_node r_node; - struct list_head r_req_lru_item; - struct list_head r_osd_item; struct list_head r_linger_item; struct list_head r_linger_osd_item; struct ceph_osd *r_osd; @@ -219,19 +218,16 @@ struct ceph_osd_client { struct ceph_client *client; struct ceph_osdmap *osdmap; /* current map */ - struct rw_semaphore map_sem; + struct rw_semaphore lock; - struct mutex request_mutex; struct rb_root osds; /* osds */ struct list_head osd_lru; /* idle osds */ spinlock_t osd_lru_lock; - u64 last_tid; /* tid of last request */ - struct rb_root requests; /* pending requests */ - struct list_head req_lru; /* in-flight lru */ - struct list_head req_unsent; /* unsent/need-resend queue */ - struct list_head req_notarget; /* map to no osd */ struct list_head req_linger; /* lingering requests */ - int num_requests; + struct ceph_osd homeless_osd; + atomic64_t last_tid; /* tid of last request */ + atomic_t num_requests; + atomic_t num_homeless; struct delayed_work timeout_work; struct delayed_work osds_timeout_work; #ifdef CONFIG_DEBUG_FS -- cgit v1.2.3 From 922dab6134178cae317ae00de86376cba59f3147 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 26 May 2016 01:15:02 +0200 Subject: libceph, rbd: ceph_osd_linger_request, watch/notify v2 This adds support and switches rbd to a new, more reliable version of watch/notify protocol. As with the OSD client update, this is mostly about getting the right structures linked into the right places so that reconnects are properly sent when needed. watch/notify v2 also requires sending regular pings to the OSDs - send_linger_ping(). A major change from the old watch/notify implementation is the introduction of ceph_osd_linger_request - linger requests no longer piggy back on ceph_osd_request. ceph_osd_event has been merged into ceph_osd_linger_request. All the details are now hidden within libceph, the interface consists of a simple pair of watch/unwatch functions and ceph_osdc_notify_ack(). ceph_osdc_watch() does return ceph_osd_linger_request, but only to keep the lifetime management simple. ceph_osdc_notify_ack() accepts an optional data payload, which is relayed back to the notifier. Portions of this patch are loosely based on work by Douglas Fuller and Mike Christie . Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 5 ++- include/linux/ceph/osd_client.h | 97 ++++++++++++++++++++++++----------------- include/linux/ceph/rados.h | 17 ++++++-- 3 files changed, 75 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 37f28bf55ce4..3b911ff889dd 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -153,8 +153,9 @@ struct ceph_dir_layout { /* watch-notify operations */ enum { - WATCH_NOTIFY = 1, /* notifying watcher */ - WATCH_NOTIFY_COMPLETE = 2, /* notifier notified when done */ + CEPH_WATCH_EVENT_NOTIFY = 1, /* notifying watcher */ + CEPH_WATCH_EVENT_NOTIFY_COMPLETE = 2, /* notifier notified when done */ + CEPH_WATCH_EVENT_DISCONNECT = 3, /* we were disconnected */ }; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 342f22f1f040..cd2dcb8939de 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -34,7 +34,7 @@ struct ceph_osd { struct rb_node o_node; struct ceph_connection o_con; struct rb_root o_requests; - struct list_head o_linger_requests; + struct rb_root o_linger_requests; struct list_head o_osd_lru; struct ceph_auth_handshake o_auth; unsigned long lru_ttl; @@ -108,11 +108,12 @@ struct ceph_osd_req_op { } cls; struct { u64 cookie; - u64 ver; - u32 prot_ver; - u32 timeout; - __u8 flag; + __u8 op; /* CEPH_OSD_WATCH_OP_ */ + u32 gen; } watch; + struct { + struct ceph_osd_data request_data; + } notify_ack; struct { u64 expected_object_size; u64 expected_write_size; @@ -145,8 +146,6 @@ struct ceph_osd_request_target { struct ceph_osd_request { u64 r_tid; /* unique for this client */ struct rb_node r_node; - struct list_head r_linger_item; - struct list_head r_linger_osd_item; struct ceph_osd *r_osd; struct ceph_osd_request_target r_t; @@ -162,7 +161,6 @@ struct ceph_osd_request { int r_result; bool r_got_reply; - int r_linger; struct ceph_osd_client *r_osdc; struct kref r_kref; @@ -181,6 +179,7 @@ struct ceph_osd_request { struct ceph_snap_context *r_snapc; /* for writes */ struct timespec r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ + bool r_linger; /* don't resend on failure */ /* internal */ unsigned long r_stamp; /* jiffies, send or check time */ @@ -195,23 +194,40 @@ struct ceph_request_redirect { struct ceph_object_locator oloc; }; -struct ceph_osd_event { - u64 cookie; - int one_shot; +typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie, + u64 notifier_id, void *data, size_t data_len); +typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err); + +struct ceph_osd_linger_request { struct ceph_osd_client *osdc; - void (*cb)(u64, u64, u8, void *); - void *data; - struct rb_node node; - struct list_head osd_node; + u64 linger_id; + bool committed; + + struct ceph_osd *osd; + struct ceph_osd_request *reg_req; + struct ceph_osd_request *ping_req; + unsigned long ping_sent; + + struct ceph_osd_request_target t; + u32 last_force_resend; + + struct timespec mtime; + struct kref kref; -}; + struct mutex lock; + struct rb_node node; /* osd */ + struct rb_node osdc_node; /* osdc */ + struct list_head scan_item; + + struct completion reg_commit_wait; + int reg_commit_error; + int last_error; + + u32 register_gen; -struct ceph_osd_event_work { - struct work_struct work; - struct ceph_osd_event *event; - u64 ver; - u64 notify_id; - u8 opcode; + rados_watchcb2_t wcb; + rados_watcherrcb_t errcb; + void *data; }; struct ceph_osd_client { @@ -223,9 +239,10 @@ struct ceph_osd_client { struct rb_root osds; /* osds */ struct list_head osd_lru; /* idle osds */ spinlock_t osd_lru_lock; - struct list_head req_linger; /* lingering requests */ struct ceph_osd homeless_osd; atomic64_t last_tid; /* tid of last request */ + u64 last_linger_id; + struct rb_root linger_requests; /* lingering requests */ atomic_t num_requests; atomic_t num_homeless; struct delayed_work timeout_work; @@ -239,10 +256,6 @@ struct ceph_osd_client { struct ceph_msgpool msgpool_op; struct ceph_msgpool msgpool_op_reply; - spinlock_t event_lock; - struct rb_root event_tree; - u64 event_count; - struct workqueue_struct *notify_wq; }; @@ -314,9 +327,6 @@ extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *name, const void *value, size_t size, u8 cmp_op, u8 cmp_mode); -extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode, - u64 cookie, u64 version, int flag); extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, unsigned int which, u64 expected_object_size, @@ -339,9 +349,6 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, u32 truncate_seq, u64 truncate_size, bool use_mempool); -extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, - struct ceph_osd_request *req); - extern void ceph_osdc_get_request(struct ceph_osd_request *req); extern void ceph_osdc_put_request(struct ceph_osd_request *req); @@ -372,11 +379,23 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct timespec *mtime, struct page **pages, int nr_pages); -/* watch/notify events */ -extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, - void (*event_cb)(u64, u64, u8, void *), - void *data, struct ceph_osd_event **pevent); -extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); -extern void ceph_osdc_put_event(struct ceph_osd_event *event); +/* watch/notify */ +struct ceph_osd_linger_request * +ceph_osdc_watch(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + rados_watchcb2_t wcb, + rados_watcherrcb_t errcb, + void *data); +int ceph_osdc_unwatch(struct ceph_osd_client *osdc, + struct ceph_osd_linger_request *lreq); + +int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + u64 notify_id, + u64 cookie, + void *payload, + size_t payload_len); #endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 28740a58f32c..204c8c944703 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -427,7 +427,17 @@ enum { CEPH_OSD_CMPXATTR_MODE_U64 = 2 }; -#define RADOS_NOTIFY_VER 1 +enum { + CEPH_OSD_WATCH_OP_UNWATCH = 0, + CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1, + /* note: use only ODD ids to prevent pre-giant code from + interpreting the op as UNWATCH */ + CEPH_OSD_WATCH_OP_WATCH = 3, + CEPH_OSD_WATCH_OP_RECONNECT = 5, + CEPH_OSD_WATCH_OP_PING = 7, +}; + +const char *ceph_osd_watch_op_name(int o); /* * an individual object operation. each may be accompanied by some data @@ -462,8 +472,9 @@ struct ceph_osd_op { } __attribute__ ((packed)) snap; struct { __le64 cookie; - __le64 ver; - __u8 flag; /* 0 = unwatch, 1 = watch */ + __le64 ver; /* no longer used */ + __u8 op; /* CEPH_OSD_WATCH_OP_* */ + __le32 gen; /* registration generation */ } __attribute__ ((packed)) watch; struct { __le64 offset, length; -- cgit v1.2.3 From 1907920324f1f3ebb6618344417c03a2863bba01 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:27 +0200 Subject: libceph: support for sending notifies Implement ceph_osdc_notify() for sending notifies. Due to the fact that the current messenger can't do read-in into pagelists (it can only do write-out from them), I had to go with a page vector for a NOTIFY_COMPLETE payload, for now. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 20 ++++++++++++++++++++ include/linux/ceph/rados.h | 3 +++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index cd2dcb8939de..63054fae4f15 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -114,6 +114,11 @@ struct ceph_osd_req_op { struct { struct ceph_osd_data request_data; } notify_ack; + struct { + u64 cookie; + struct ceph_osd_data request_data; + struct ceph_osd_data response_data; + } notify; struct { u64 expected_object_size; u64 expected_write_size; @@ -202,6 +207,7 @@ struct ceph_osd_linger_request { struct ceph_osd_client *osdc; u64 linger_id; bool committed; + bool is_watch; /* watch or notify */ struct ceph_osd *osd; struct ceph_osd_request *reg_req; @@ -220,14 +226,20 @@ struct ceph_osd_linger_request { struct list_head scan_item; struct completion reg_commit_wait; + struct completion notify_finish_wait; int reg_commit_error; + int notify_finish_error; int last_error; u32 register_gen; + u64 notify_id; rados_watchcb2_t wcb; rados_watcherrcb_t errcb; void *data; + + struct page ***preply_pages; + size_t *preply_len; }; struct ceph_osd_client { @@ -397,5 +409,13 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, u64 cookie, void *payload, size_t payload_len); +int ceph_osdc_notify(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + void *payload, + size_t payload_len, + u32 timeout, + struct page ***preply_pages, + size_t *preply_len); #endif diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 204c8c944703..5c0da61cb763 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -476,6 +476,9 @@ struct ceph_osd_op { __u8 op; /* CEPH_OSD_WATCH_OP_* */ __le32 gen; /* registration generation */ } __attribute__ ((packed)) watch; + struct { + __le64 cookie; + } __attribute__ ((packed)) notify; struct { __le64 offset, length; __le64 src_offset; -- cgit v1.2.3 From b07d3c4bd7270c74e2b6803af8ac8a00cb3e5ed2 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:27 +0200 Subject: libceph: support for checking on status of watch Implement ceph_osdc_watch_check() to be able to check on status of watch. Note that the time it takes for a watch/notify event to get delivered through the notify_wq is taken into account. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 63054fae4f15..2ae7cfd82ec9 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -213,6 +213,8 @@ struct ceph_osd_linger_request { struct ceph_osd_request *reg_req; struct ceph_osd_request *ping_req; unsigned long ping_sent; + unsigned long watch_valid_thru; + struct list_head pending_lworks; struct ceph_osd_request_target t; u32 last_force_resend; @@ -417,5 +419,7 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, u32 timeout, struct page ***preply_pages, size_t *preply_len); +int ceph_osdc_watch_check(struct ceph_osd_client *osdc, + struct ceph_osd_linger_request *lreq); #endif -- cgit v1.2.3 From d0b19705e99939f5ae5aa9b57bfe41dd4777d951 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:27 +0200 Subject: libceph: async MON client generic requests For map check, we are going to need to send CEPH_MSG_MON_GET_VERSION messages asynchronously and get a callback on completion. Refactor MON client to allow firing off generic requests asynchronously and add an async variant of ceph_monc_get_version(). ceph_monc_do_statfs() is switched over and remains sync. Signed-off-by: Ilya Dryomov --- include/linux/ceph/mon_client.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index c14e9d861cda..19800d9b45f3 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -39,20 +39,31 @@ struct ceph_mon_request { ceph_monc_request_func_t do_request; }; +typedef void (*ceph_monc_callback_t)(struct ceph_mon_generic_request *); + /* * ceph_mon_generic_request is being used for the statfs and * mon_get_version requests which are being done a bit differently * because we need to get data back to the caller */ struct ceph_mon_generic_request { + struct ceph_mon_client *monc; struct kref kref; u64 tid; struct rb_node node; int result; - void *buf; + struct completion completion; + ceph_monc_callback_t complete_cb; + u64 private_data; /* r_tid/linger_id */ + struct ceph_msg *request; /* original request */ struct ceph_msg *reply; /* and reply */ + + union { + struct ceph_statfs *st; + u64 newest; + } u; }; struct ceph_mon_client { @@ -124,8 +135,10 @@ extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf); -extern int ceph_monc_do_get_version(struct ceph_mon_client *monc, - const char *what, u64 *newest); +int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what, + u64 *newest); +int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what, + ceph_monc_callback_t cb, u64 private_data); extern int ceph_monc_open_session(struct ceph_mon_client *monc); -- cgit v1.2.3 From 4609245e2670e3698b083bcd9cc69a65b2b6f9a6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:27 +0200 Subject: libceph: pool deletion detection This adds the "map check" infrastructure for sending osdmap version checks on CALC_TARGET_POOL_DNE and completing in-flight requests with -ENOENT if the target pool doesn't exist or has just been deleted. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2ae7cfd82ec9..3e7bf72e4078 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -151,6 +151,7 @@ struct ceph_osd_request_target { struct ceph_osd_request { u64 r_tid; /* unique for this client */ struct rb_node r_node; + struct rb_node r_mc_node; /* map check */ struct ceph_osd *r_osd; struct ceph_osd_request_target r_t; @@ -191,6 +192,7 @@ struct ceph_osd_request { int r_attempts; struct ceph_eversion r_replay_version; /* aka reassert_version */ u32 r_last_force_resend; + u32 r_map_dne_bound; struct ceph_osd_req_op r_ops[]; }; @@ -218,6 +220,7 @@ struct ceph_osd_linger_request { struct ceph_osd_request_target t; u32 last_force_resend; + u32 map_dne_bound; struct timespec mtime; @@ -225,6 +228,7 @@ struct ceph_osd_linger_request { struct mutex lock; struct rb_node node; /* osd */ struct rb_node osdc_node; /* osdc */ + struct rb_node mc_node; /* map check */ struct list_head scan_item; struct completion reg_commit_wait; @@ -257,6 +261,8 @@ struct ceph_osd_client { atomic64_t last_tid; /* tid of last request */ u64 last_linger_id; struct rb_root linger_requests; /* lingering requests */ + struct rb_root map_checks; + struct rb_root linger_map_checks; atomic_t num_requests; atomic_t num_homeless; struct delayed_work timeout_work; -- cgit v1.2.3 From 7cca78c9dcd1afa243e46edc31896730df85d2b5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:28 +0200 Subject: libceph: replace ceph_monc_request_next_osdmap() ... with a wrapper around maybe_request_map() - no need for two osdmap-specific functions. Signed-off-by: Ilya Dryomov --- include/linux/ceph/mon_client.h | 1 - include/linux/ceph/osd_client.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index 19800d9b45f3..1d730993c3f8 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -128,7 +128,6 @@ bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch, void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch); void ceph_monc_renew_subs(struct ceph_mon_client *monc); -extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, unsigned long timeout); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 3e7bf72e4078..19b14862d3e0 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -381,6 +381,7 @@ extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, extern void ceph_osdc_sync(struct ceph_osd_client *osdc); extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc); +void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc); extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, struct ceph_vino vino, -- cgit v1.2.3 From 737cc81ead34bcef0b1f6ea8322228e4378cf21a Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 26 May 2016 00:05:01 +0200 Subject: libceph: support for subscribing to "mdsmap." maps Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 2 ++ include/linux/ceph/mon_client.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 3b911ff889dd..bae833d0d055 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -208,6 +208,8 @@ struct ceph_mon_subscribe_ack { struct ceph_fsid fsid; } __attribute__ ((packed)); +#define CEPH_FS_CLUSTER_ID_NONE -1 + /* * mdsmap flags */ diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index 1d730993c3f8..e2a92df08b47 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -96,6 +96,7 @@ struct ceph_mon_client { bool want; u32 have; /* epoch */ } subs[3]; + int fs_cluster_id; /* "mdsmap." sub */ #ifdef CONFIG_DEBUG_FS struct dentry *debugfs_file; -- cgit v1.2.3 From 956d39d631dbcf7b57854873a24e309047f2a7f5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 27 Apr 2016 17:48:30 +0800 Subject: ceph: define 'end/complete' in readdir reply as bit flags Set a flag in readdir request, which indicates that client interprets 'end/complete' as bit flags. So that mds can reply additional flags in readdir reply. Signed-off-by: Yan, Zheng --- include/linux/ceph/ceph_fs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index bae833d0d055..a811c5e98bfa 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -347,6 +347,17 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_XATTR_REPLACE (1 << 1) #define CEPH_XATTR_REMOVE (1 << 31) +/* + * readdir request flags; + */ +#define CEPH_READDIR_REPLY_BITFLAGS (1<<0) + +/* + * readdir reply flags. + */ +#define CEPH_READDIR_FRAG_END (1<<0) +#define CEPH_READDIR_FRAG_COMPLETE (1<<8) + union ceph_mds_request_args { struct { __le32 mask; /* CEPH_CAP_* */ @@ -364,6 +375,7 @@ union ceph_mds_request_args { __le32 frag; /* which dir fragment */ __le32 max_entries; /* how many dentries to grab */ __le32 max_bytes; + __le16 flags; } __attribute__ ((packed)) readdir; struct { __le32 mode; -- cgit v1.2.3 From f3c4ebe65ea149ec892f94474233cfebe9cbe299 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 29 Apr 2016 11:27:30 +0800 Subject: ceph: using hash value to compose dentry offset If MDS sorts dentries in dirfrag in hash order, we use hash value to compose dentry offset. dentry offset is: (0xff << 52) | ((24 bits hash) << 28) | (the nth entry hash hash collision) This offset is stable across directory fragmentation. This alos means there is no need to reset readdir offset if directory get fragmented in the middle of readdir. Signed-off-by: Yan, Zheng --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index a811c5e98bfa..dfce616002ad 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -357,6 +357,7 @@ extern const char *ceph_mds_op_name(int op); */ #define CEPH_READDIR_FRAG_END (1<<0) #define CEPH_READDIR_FRAG_COMPLETE (1<<8) +#define CEPH_READDIR_HASH_ORDER (1<<9) union ceph_mds_request_args { struct { -- cgit v1.2.3 From 3b33f692c84c28cc8178aaeeb9264d82b48787f1 Mon Sep 17 00:00:00 2001 From: Zhang Zhuoyu Date: Fri, 25 Mar 2016 05:18:39 -0400 Subject: ceph: make logical calculation functions return bool This patch makes serverl logical caculation functions return bool to improve readability due to these particular functions only using 0/1 as their return value. No functional change. Signed-off-by: Zhang Zhuoyu --- include/linux/ceph/ceph_frag.h | 4 ++-- include/linux/ceph/decode.h | 2 +- include/linux/ceph/osdmap.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h index b827e066e55a..146507df8650 100644 --- a/include/linux/ceph/ceph_frag.h +++ b/include/linux/ceph/ceph_frag.h @@ -51,11 +51,11 @@ static inline __u32 ceph_frag_make_child(__u32 f, int by, int i) return ceph_frag_make(newbits, ceph_frag_value(f) | (i << (24 - newbits))); } -static inline int ceph_frag_is_leftmost(__u32 f) +static inline bool ceph_frag_is_leftmost(__u32 f) { return ceph_frag_value(f) == 0; } -static inline int ceph_frag_is_rightmost(__u32 f) +static inline bool ceph_frag_is_rightmost(__u32 f) { return ceph_frag_value(f) == ceph_frag_mask(f); } diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index a6ef9cc267ec..19e9932f3e77 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -47,7 +47,7 @@ static inline void ceph_decode_copy(void **p, void *pv, size_t n) /* * bounds check input. */ -static inline int ceph_has_room(void **p, void *end, size_t n) +static inline bool ceph_has_room(void **p, void *end, size_t n) { return end >= *p && n <= end - *p; } diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 821e16fff39a..ddc426b22d81 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -172,19 +172,19 @@ struct ceph_osdmap { int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; }; -static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd) +static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd) { return osd >= 0 && osd < map->max_osd && (map->osd_state[osd] & CEPH_OSD_EXISTS); } -static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) +static inline bool ceph_osd_is_up(struct ceph_osdmap *map, int osd) { return ceph_osd_exists(map, osd) && (map->osd_state[osd] & CEPH_OSD_UP); } -static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd) +static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd) { return !ceph_osd_is_up(map, osd); } -- cgit v1.2.3 From 887bddfa90c79957d61067cd54a10087be0c8b23 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 26 May 2016 00:04:58 -0400 Subject: add down_write_killable_nested() Signed-off-by: Al Viro --- include/linux/rwsem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index d1c12d160ace..d37fbb34d06f 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -156,6 +156,7 @@ extern void downgrade_write(struct rw_semaphore *sem); */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); +extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass); extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); # define down_write_nest_lock(sem, nest_lock) \ @@ -176,6 +177,7 @@ extern void up_read_non_owner(struct rw_semaphore *sem); # define down_read_nested(sem, subclass) down_read(sem) # define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) +# define down_write_killable_nested(sem, subclass) down_write_killable(sem) # define down_read_non_owner(sem) down_read(sem) # define up_read_non_owner(sem) up_read(sem) #endif -- cgit v1.2.3 From 868b2072f09c8a698df8066ca72d30411dcc57d6 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Mon, 23 May 2016 11:44:39 -0700 Subject: misc: at24: Fix typo in at24 header file This commit fixes a simple typo s/mvmem/nvmem in the example. Signed-off-by: Moritz Fischer Signed-off-by: Wolfram Sang --- include/linux/platform_data/at24.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/at24.h b/include/linux/platform_data/at24.h index dc9a13e5acda..be830b141d83 100644 --- a/include/linux/platform_data/at24.h +++ b/include/linux/platform_data/at24.h @@ -26,7 +26,7 @@ * * An example in pseudo code for a setup() callback: * - * void get_mac_addr(struct mvmem_device *nvmem, void *context) + * void get_mac_addr(struct nvmem_device *nvmem, void *context) * { * u8 *mac_addr = ethernet_pdata->mac_addr; * off_t offset = context; -- cgit v1.2.3 From 50755bc1c305340660bbfa65fdae3ed113d8fe0e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 26 May 2016 15:16:06 -0700 Subject: seqlock: fix raw_read_seqcount_latch() lockless_dereference() is supposed to take pointer not integer. Link: http://lkml.kernel.org/r/20160521201448.GA7429@p183.telecom.by Signed-off-by: Alexey Dobriyan Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/seqlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index e0582106ef4f..7973a821ac58 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -277,7 +277,7 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) static inline int raw_read_seqcount_latch(seqcount_t *s) { - return lockless_dereference(s->sequence); + return lockless_dereference(s)->sequence; } /** @@ -331,7 +331,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * unsigned seq, idx; * * do { - * seq = lockless_dereference(latch->seq); + * seq = lockless_dereference(latch)->seq; * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); -- cgit v1.2.3 From d96c84f8d27ce57ff08f12b9654d9f505a8cce6e Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 26 May 2016 15:16:14 -0700 Subject: mm: slub: remove unused virt_to_obj() It's unused since commit 7ed2f9e66385 ("mm, kasan: SLAB support") Link: http://lkml.kernel.org/r/1464020961-2242-1-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Cc: Joonsoo Kim Cc: David Rientjes Cc: Pekka Enberg Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 665cd0cd18b8..d1faa019c02a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -111,22 +111,6 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) } #endif - -/** - * virt_to_obj - returns address of the beginning of object. - * @s: object's kmem_cache - * @slab_page: address of slab page - * @x: address within object memory range - * - * Returns address of the beginning of object - */ -static inline void *virt_to_obj(struct kmem_cache *s, - const void *slab_page, - const void *x) -{ - return (void *)x - ((x - slab_page) % s->size); -} - void object_err(struct kmem_cache *s, struct page *page, u8 *object, char *reason); -- cgit v1.2.3 From 7ef949d77f95f0d129f0d404b336459a34a00101 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 26 May 2016 15:16:22 -0700 Subject: mm: oom_reaper: remove some bloat mmput_async is currently used only from the oom_reaper which is defined only for CONFIG_MMU. We can save work_struct in mm_struct for !CONFIG_MMU. [akpm@linux-foundation.org: fix typo, per Minchan] Link: http://lkml.kernel.org/r/20160520061658.GB19172@dhcp22.suse.cz Reported-by: Minchan Kim Signed-off-by: Michal Hocko Acked-by: Minchan Kim Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ include/linux/sched.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d553855503e6..ca3e517980a0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -514,7 +514,9 @@ struct mm_struct { #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif +#ifdef CONFIG_MMU struct work_struct async_put_work; +#endif }; static inline void mm_init_cpumask(struct mm_struct *mm) diff --git a/include/linux/sched.h b/include/linux/sched.h index 23e075dcdfe4..6e42ada26345 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2745,10 +2745,12 @@ static inline bool mmget_not_zero(struct mm_struct *mm) /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); -/* same as above but performs the slow path from the async kontext. Can +#ifdef CONFIG_MMU +/* same as above but performs the slow path from the async context. Can * be called from the atomic context as well */ extern void mmput_async(struct mm_struct *); +#endif /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); -- cgit v1.2.3 From 5930122683dff58f0846b0f0405b4bd598a3ba6a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2016 10:19:30 -0400 Subject: switch xattr_handler->set() to passing dentry and inode separately preparation for similar switch in ->setxattr() (see the next commit for rationale). Signed-off-by: Al Viro --- include/linux/xattr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 1cc4c578deb9..76beb206741a 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -33,8 +33,8 @@ struct xattr_handler { struct inode *inode, const char *name, void *buffer, size_t size); int (*set)(const struct xattr_handler *, struct dentry *dentry, - const char *name, const void *buffer, size_t size, - int flags); + struct inode *inode, const char *name, const void *buffer, + size_t size, int flags); }; const char *xattr_full_name(const struct xattr_handler *, const char *); -- cgit v1.2.3 From 7ded384a12688c2a86b618da16bc87713404dfcc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 27 May 2016 15:23:32 -0700 Subject: mm: fix section mismatch warning The register_page_bootmem_info_node() function needs to be marked __init in order to avoid a new warning introduced by commit f65e91df25aa ("mm: use early_pfn_to_nid in register_page_bootmem_info_node"). Otherwise you'll get a warning about how a non-init function calls early_pfn_to_nid (which is __meminit) Cc: Yang Shi Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 20d8a5d4d133..5145620ba48a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -182,7 +182,7 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE -extern void register_page_bootmem_info_node(struct pglist_data *pgdat); +extern void __init register_page_bootmem_info_node(struct pglist_data *pgdat); #else static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) { -- cgit v1.2.3 From 5d22fc25d4fc8096d2d7df27ea1893d4e055e764 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 27 May 2016 15:57:31 -0700 Subject: mm: remove more IS_ERR_VALUE abuses The do_brk() and vm_brk() return value was "unsigned long" and returned the starting address on success, and an error value on failure. The reasons are entirely historical, and go back to it basically behaving like the mmap() interface does. However, nobody actually wanted that interface, and it causes totally pointless IS_ERR_VALUE() confusion. What every single caller actually wants is just the simpler integer return of zero for success and negative error number on failure. So just convert to that much clearer and more common calling convention, and get rid of all the IS_ERR_VALUE() uses wrt vm_brk(). Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a00ec816233a..5df5feb49575 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2018,7 +2018,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif /* These take the mm semaphore themselves */ -extern unsigned long __must_check vm_brk(unsigned long, unsigned long); +extern int __must_check vm_brk(unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, -- cgit v1.2.3 From aa00edc1287a693eadc7bc67a3d73555d969b35d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 27 May 2016 16:03:22 -0700 Subject: make IS_ERR_VALUE() complain about non-pointer-sized arguments Now that the allmodconfig x86-64 build is clean wrt IS_ERR_VALUE() uses on integers, add a cast to a pointer and back to the argument, so that any new mis-uses of IS_ERR_VALUE() will cause warnings like warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] so that we don't re-introduce any bogus uses. Signed-off-by: Linus Torvalds --- include/linux/err.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/err.h b/include/linux/err.h index 56762ab41713..1e3558845e4c 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -18,7 +18,7 @@ #ifndef __ASSEMBLY__ -#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) +#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) static inline void * __must_check ERR_PTR(long error) { -- cgit v1.2.3 From 3767e255b390d72f9a33c08d9e86c5f21f25860f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 27 May 2016 11:06:05 -0400 Subject: switch ->setxattr() to passing dentry and inode separately smack ->d_instantiate() uses ->setxattr(), so to be able to call it before we'd hashed the new dentry and attached it to inode, we need ->setxattr() instances getting the inode as an explicit argument rather than obtaining it from dentry. Similar change for ->getxattr() had been done in commit ce23e64. Unlike ->getxattr() (which is used by both selinux and smack instances of ->d_instantiate()) ->setxattr() is used only by smack one and unfortunately it got missed back then. Reported-by: Seung-Woo Kim Tested-by: Casey Schaufler Signed-off-by: Al Viro --- include/linux/fs.h | 3 ++- include/linux/xattr.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5f61431d8673..62bdb0a6cf2d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1730,7 +1730,8 @@ struct inode_operations { struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); + int (*setxattr) (struct dentry *, struct inode *, + const char *, const void *, size_t, int); ssize_t (*getxattr) (struct dentry *, struct inode *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 76beb206741a..94079bab9243 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -54,7 +54,8 @@ int vfs_removexattr(struct dentry *, const char *); ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); -int generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); +int generic_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags); int generic_removexattr(struct dentry *dentry, const char *name); ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, size_t size, gfp_t flags); -- cgit v1.2.3 From f4bcbe792b8f434e32487cff9d9e30ab45a3ce02 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Fri, 20 May 2016 07:26:00 -0400 Subject: Pull out string hash to ... so they can be used without the rest of The hashlen_* macros will make sense next patch. Signed-off-by: George Spelvin --- include/linux/dcache.h | 27 +---------------- include/linux/stringhash.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 26 deletions(-) create mode 100644 include/linux/stringhash.h (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7e9422cb5989..0f9a977c334f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -10,6 +10,7 @@ #include #include #include +#include struct path; struct vfsmount; @@ -52,9 +53,6 @@ struct qstr { }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } -#define hashlen_hash(hashlen) ((u32) (hashlen)) -#define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) -#define hashlen_create(hash,len) (((u64)(len)<<32)|(u32)(hash)) struct dentry_stat_t { long nr_dentry; @@ -65,29 +63,6 @@ struct dentry_stat_t { }; extern struct dentry_stat_t dentry_stat; -/* Name hashing routines. Initial hash value */ -/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ -#define init_name_hash() 0 - -/* partial hash update function. Assume roughly 4 bits per character */ -static inline unsigned long -partial_name_hash(unsigned long c, unsigned long prevhash) -{ - return (prevhash + (c << 4) + (c >> 4)) * 11; -} - -/* - * Finally: cut down the number of bits to a int value (and try to avoid - * losing bits) - */ -static inline unsigned long end_name_hash(unsigned long hash) -{ - return (unsigned int) hash; -} - -/* Compute the hash for a name string. */ -extern unsigned int full_name_hash(const unsigned char *, unsigned int); - /* * Try to keep struct dentry aligned on 64 byte cachelines (this will * give reasonable cacheline footprint with larger lines without the diff --git a/include/linux/stringhash.h b/include/linux/stringhash.h new file mode 100644 index 000000000000..2eaaaf6d2776 --- /dev/null +++ b/include/linux/stringhash.h @@ -0,0 +1,72 @@ +#ifndef __LINUX_STRINGHASH_H +#define __LINUX_STRINGHASH_H + +#include + +/* + * Routines for hashing strings of bytes to a 32-bit hash value. + * + * These hash functions are NOT GUARANTEED STABLE between kernel + * versions, architectures, or even repeated boots of the same kernel. + * (E.g. they may depend on boot-time hardware detection or be + * deliberately randomized.) + * + * They are also not intended to be secure against collisions caused by + * malicious inputs; much slower hash functions are required for that. + * + * They are optimized for pathname components, meaning short strings. + * Even if a majority of files have longer names, the dynamic profile of + * pathname components skews short due to short directory names. + * (E.g. /usr/lib/libsesquipedalianism.so.3.141.) + */ + +/* + * Version 1: one byte at a time. Example of use: + * + * unsigned long hash = init_name_hash; + * while (*p) + * hash = partial_name_hash(tolower(*p++), hash); + * hash = end_name_hash(hash); + * + * Although this is designed for bytes, fs/hfsplus/unicode.c + * abuses it to hash 16-bit values. + */ + +/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ +#define init_name_hash() 0 + +/* partial hash update function. Assume roughly 4 bits per character */ +static inline unsigned long +partial_name_hash(unsigned long c, unsigned long prevhash) +{ + return (prevhash + (c << 4) + (c >> 4)) * 11; +} + +/* + * Finally: cut down the number of bits to a int value (and try to avoid + * losing bits) + */ +static inline unsigned long end_name_hash(unsigned long hash) +{ + return (unsigned int)hash; +} + +/* + * Version 2: One word (32 or 64 bits) at a time. + * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning + * exists, which describes major Linux platforms like x86 and ARM), then + * this computes a different hash function much faster. + * + * If not set, this falls back to a wrapper around the preceding. + */ +extern unsigned int full_name_hash(const unsigned char *, unsigned int); + +/* + * A hash_len is a u64 with the hash of a string in the low + * half and the length in the high half. + */ +#define hashlen_hash(hashlen) ((u32)(hashlen)) +#define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) +#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash)) + +#endif /* __LINUX_STRINGHASH_H */ -- cgit v1.2.3 From fcfd2fbf22d2587196890103d41e3d554c47da0e Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Fri, 20 May 2016 08:41:37 -0400 Subject: fs/namei.c: Add hashlen_string() function We'd like to make more use of the highly-optimized dcache hash functions throughout the kernel, rather than have every subsystem create its own, and a function that hashes basic null-terminated strings is required for that. (The name is to emphasize that it returns both hash and length.) It's actually useful in the dcache itself, specifically d_alloc_name(). Other uses in the next patch. full_name_hash() is also tweaked to make it more generally useful: 1) Take a "char *" rather than "unsigned char *" argument, to be consistent with hash_name(). 2) Handle zero-length inputs. If we want more callers, we don't want to make them worry about corner cases. Signed-off-by: George Spelvin --- include/linux/stringhash.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stringhash.h b/include/linux/stringhash.h index 2eaaaf6d2776..451771d9b9c0 100644 --- a/include/linux/stringhash.h +++ b/include/linux/stringhash.h @@ -1,7 +1,8 @@ #ifndef __LINUX_STRINGHASH_H #define __LINUX_STRINGHASH_H -#include +#include /* For __pure */ +#include /* For u32, u64 */ /* * Routines for hashing strings of bytes to a 32-bit hash value. @@ -59,7 +60,7 @@ static inline unsigned long end_name_hash(unsigned long hash) * * If not set, this falls back to a wrapper around the preceding. */ -extern unsigned int full_name_hash(const unsigned char *, unsigned int); +extern unsigned int __pure full_name_hash(const char *, unsigned int); /* * A hash_len is a u64 with the hash of a string in the low @@ -69,4 +70,7 @@ extern unsigned int full_name_hash(const unsigned char *, unsigned int); #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) #define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash)) +/* Return the "hash_len" (hash and length) of a null-terminated string */ +extern u64 __pure hashlen_string(const char *name); + #endif /* __LINUX_STRINGHASH_H */ -- cgit v1.2.3 From 917ea166f4672ec085f2cccc135c7c0eec72282c Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Fri, 20 May 2016 13:31:33 -0400 Subject: : Define hash_str() in terms of hashlen_string() Finally, the first use of previous two patches: eliminate the separate ad-hoc string hash functions in the sunrpc code. Now hash_str() is a wrapper around hash_string(), and hash_mem() is likewise a wrapper around full_name_hash(). Note that sunrpc code *does* call hash_mem() with a zero length, which is why the previous patch needed to handle that in full_name_hash(). (Thanks, Bruce, for finding that!) This also eliminates the only caller of hash_long which asks for more than 32 bits of output. The comment about the quality of hashlen_string() and full_name_hash() is jumping the gun by a few patches; they aren't very impressive now, but will be improved greatly later in the series. Signed-off-by: George Spelvin Tested-by: J. Bruce Fields Acked-by: J. Bruce Fields Cc: Jeff Layton Cc: linux-nfs@vger.kernel.org --- include/linux/sunrpc/svcauth.h | 40 +++++++++------------------------------- 1 file changed, 9 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index c00f53a4ccdd..91d5a5d6f52b 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -16,6 +16,7 @@ #include #include #include +#include #include struct svc_cred { @@ -165,41 +166,18 @@ extern int svcauth_unix_set_client(struct svc_rqst *rqstp); extern int unix_gid_cache_create(struct net *net); extern void unix_gid_cache_destroy(struct net *net); -static inline unsigned long hash_str(char *name, int bits) +/* + * The functions are good enough that we don't need to + * use hash_32() on them; just extracting the high bits is enough. + */ +static inline unsigned long hash_str(char const *name, int bits) { - unsigned long hash = 0; - unsigned long l = 0; - int len = 0; - unsigned char c; - do { - if (unlikely(!(c = *name++))) { - c = (char)len; len = -1; - } - l = (l << 8) | c; - len++; - if ((len & (BITS_PER_LONG/8-1))==0) - hash = hash_long(hash^l, BITS_PER_LONG); - } while (len); - return hash >> (BITS_PER_LONG - bits); + return hashlen_hash(hashlen_string(name)) >> (32 - bits); } -static inline unsigned long hash_mem(char *buf, int length, int bits) +static inline unsigned long hash_mem(char const *buf, int length, int bits) { - unsigned long hash = 0; - unsigned long l = 0; - int len = 0; - unsigned char c; - do { - if (len == length) { - c = (char)len; len = -1; - } else - c = *buf++; - l = (l << 8) | c; - len++; - if ((len & (BITS_PER_LONG/8-1))==0) - hash = hash_long(hash^l, BITS_PER_LONG); - } while (len); - return hash >> (BITS_PER_LONG - bits); + return full_name_hash(buf, length) >> (32 - bits); } #endif /* __KERNEL__ */ -- cgit v1.2.3 From 92d567740f2ab5937b2c23bee94ea4b284bb1f98 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Thu, 26 May 2016 22:22:01 -0400 Subject: Change hash_64() return value to 32 bits That's all that's ever asked for, and it makes the return type of hash_long() consistent. It also allows (upcoming patch) an optimized implementation of hash_64 on 32-bit machines. I tried adding a BUILD_BUG_ON to ensure the number of bits requested was never more than 32 (most callers use a compile-time constant), but adding to breaks the tools/perf compiler unless tools/perf/MANIFEST is updated, and understanding that code base well enough to update it is too much trouble. I did the rest of an allyesconfig build with such a check, and nothing tripped. Signed-off-by: George Spelvin --- include/linux/hash.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hash.h b/include/linux/hash.h index 79c52fa81cac..f967dedb10e2 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -48,7 +48,7 @@ #define GOLDEN_RATIO_32 0x61C88647 #define GOLDEN_RATIO_64 0x61C8864680B583EBull -static __always_inline u64 hash_64(u64 val, unsigned int bits) +static __always_inline u32 hash_64(u64 val, unsigned int bits) { u64 hash = val; @@ -72,7 +72,7 @@ static __always_inline u64 hash_64(u64 val, unsigned int bits) #endif /* High bits are more random, so use them. */ - return hash >> (64 - bits); + return (u32)(hash >> (64 - bits)); } static inline u32 hash_32(u32 val, unsigned int bits) @@ -84,7 +84,7 @@ static inline u32 hash_32(u32 val, unsigned int bits) return hash >> (32 - bits); } -static inline unsigned long hash_ptr(const void *ptr, unsigned int bits) +static inline u32 hash_ptr(const void *ptr, unsigned int bits) { return hash_long((unsigned long)ptr, bits); } -- cgit v1.2.3 From ef703f49a6c5b909a85149bb6625c4ed0d697186 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Thu, 26 May 2016 23:00:23 -0400 Subject: Eliminate bad hash multipliers from hash_32() and hash_64() The "simplified" prime multipliers made very bad hash functions, so get rid of them. This completes the work of 689de1d6ca. To avoid the inefficiency which was the motivation for the "simplified" multipliers, hash_64() on 32-bit systems is changed to use a different algorithm. It makes two calls to hash_32() instead. drivers/media/usb/dvb-usb-v2/af9015.c uses the old GOLDEN_RATIO_PRIME_32 for some horrible reason, so it inherits a copy of the old definition. Signed-off-by: George Spelvin Cc: Antti Palosaari Cc: Mauro Carvalho Chehab --- include/linux/hash.h | 87 ++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hash.h b/include/linux/hash.h index f967dedb10e2..613cfde3a1e0 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -3,85 +3,65 @@ /* Fast hashing routine for ints, longs and pointers. (C) 2002 Nadia Yvette Chambers, IBM */ -/* - * Knuth recommends primes in approximately golden ratio to the maximum - * integer representable by a machine word for multiplicative hashing. - * Chuck Lever verified the effectiveness of this technique: - * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf - * - * These primes are chosen to be bit-sparse, that is operations on - * them can use shifts and additions instead of multiplications for - * machines where multiplications are slow. - */ - #include #include -/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ -#define GOLDEN_RATIO_PRIME_32 0x9e370001UL -/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ -#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL - +/* + * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and + * fs/inode.c. It's not actually prime any more (the previous primes + * were actively bad for hashing), but the name remains. + */ #if BITS_PER_LONG == 32 -#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32 +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32 #define hash_long(val, bits) hash_32(val, bits) #elif BITS_PER_LONG == 64 #define hash_long(val, bits) hash_64(val, bits) -#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64 +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64 #else #error Wordsize not 32 or 64 #endif /* - * The above primes are actively bad for hashing, since they are - * too sparse. The 32-bit one is mostly ok, the 64-bit one causes - * real problems. Besides, the "prime" part is pointless for the - * multiplicative hash. + * This hash multiplies the input by a large odd number and takes the + * high bits. Since multiplication propagates changes to the most + * significant end only, it is essential that the high bits of the + * product be used for the hash value. + * + * Chuck Lever verified the effectiveness of this technique: + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf * * Although a random odd number will do, it turns out that the golden * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice - * properties. + * properties. (See Knuth vol 3, section 6.4, exercise 9.) * - * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2. - * (See Knuth vol 3, section 6.4, exercise 9.) + * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2, + * which is very slightly easier to multiply by and makes no + * difference to the hash distribution. */ #define GOLDEN_RATIO_32 0x61C88647 #define GOLDEN_RATIO_64 0x61C8864680B583EBull -static __always_inline u32 hash_64(u64 val, unsigned int bits) -{ - u64 hash = val; - -#if BITS_PER_LONG == 64 - hash = hash * GOLDEN_RATIO_64; -#else - /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ - u64 n = hash; - n <<= 18; - hash -= n; - n <<= 33; - hash -= n; - n <<= 3; - hash += n; - n <<= 3; - hash -= n; - n <<= 4; - hash += n; - n <<= 2; - hash += n; -#endif - /* High bits are more random, so use them. */ - return (u32)(hash >> (64 - bits)); +static inline u32 __hash_32(u32 val) +{ + return val * GOLDEN_RATIO_32; } static inline u32 hash_32(u32 val, unsigned int bits) { - /* On some cpus multiply is faster, on others gcc will do shifts */ - u32 hash = val * GOLDEN_RATIO_PRIME_32; - /* High bits are more random, so use them. */ - return hash >> (32 - bits); + return __hash_32(val) >> (32 - bits); +} + +static __always_inline u32 hash_64(u64 val, unsigned int bits) +{ +#if BITS_PER_LONG == 64 + /* 64x64-bit multiply is efficient on all 64-bit processors */ + return val * GOLDEN_RATIO_64 >> (64 - bits); +#else + /* Hash 64 bits using only 32x32-bit multiply. */ + return hash_32((u32)val ^ __hash_32(val >> 32), bits); +#endif } static inline u32 hash_ptr(const void *ptr, unsigned int bits) @@ -89,6 +69,7 @@ static inline u32 hash_ptr(const void *ptr, unsigned int bits) return hash_long((unsigned long)ptr, bits); } +/* This really should be called fold32_ptr; it does no hashing to speak of. */ static inline u32 hash32_ptr(const void *ptr) { unsigned long val = (unsigned long)ptr; -- cgit v1.2.3 From 468a9428521e7d00fb21250af363eb94dc1d6861 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Thu, 26 May 2016 22:11:51 -0400 Subject: : Add support for architecture-specific functions This is just the infrastructure; there are no users yet. This is modelled on CONFIG_ARCH_RANDOM; a CONFIG_ symbol declares the existence of . That file may define its own versions of various functions, and define HAVE_* symbols (no CONFIG_ prefix!) to suppress the generic ones. Included is a self-test (in lib/test_hash.c) that verifies the basics. It is NOT in general required that the arch-specific functions compute the same thing as the generic, but if a HAVE_* symbol is defined with the value 1, then equality is tested. Signed-off-by: George Spelvin Cc: Geert Uytterhoeven Cc: Greg Ungerer Cc: Andreas Schwab Cc: Philippe De Muyter Cc: linux-m68k@lists.linux-m68k.org Cc: Alistair Francis Cc: Michal Simek Cc: Yoshinori Sato Cc: uclinux-h8-devel@lists.sourceforge.jp --- include/linux/hash.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hash.h b/include/linux/hash.h index 613cfde3a1e0..ad6fa21d977b 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -41,19 +41,40 @@ #define GOLDEN_RATIO_32 0x61C88647 #define GOLDEN_RATIO_64 0x61C8864680B583EBull +#ifdef CONFIG_HAVE_ARCH_HASH +/* This header may use the GOLDEN_RATIO_xx constants */ +#include +#endif -static inline u32 __hash_32(u32 val) +/* + * The _generic versions exist only so lib/test_hash.c can compare + * the arch-optimized versions with the generic. + * + * Note that if you change these, any that aren't updated + * to match need to have their HAVE_ARCH_* define values updated so the + * self-test will not false-positive. + */ +#ifndef HAVE_ARCH__HASH_32 +#define __hash_32 __hash_32_generic +#endif +static inline u32 __hash_32_generic(u32 val) { return val * GOLDEN_RATIO_32; } -static inline u32 hash_32(u32 val, unsigned int bits) +#ifndef HAVE_ARCH_HASH_32 +#define hash_32 hash_32_generic +#endif +static inline u32 hash_32_generic(u32 val, unsigned int bits) { /* High bits are more random, so use them. */ return __hash_32(val) >> (32 - bits); } -static __always_inline u32 hash_64(u64 val, unsigned int bits) +#ifndef HAVE_ARCH_HASH_64 +#define hash_64 hash_64_generic +#endif +static __always_inline u32 hash_64_generic(u64 val, unsigned int bits) { #if BITS_PER_LONG == 64 /* 64x64-bit multiply is efficient on all 64-bit processors */ -- cgit v1.2.3 From b7ec35b304b64af2830027350cc99d31e6e537c2 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:25 +0200 Subject: libceph: change ceph_osdmap_flag() to take osdc For the benefit of every single caller, take osdc instead of map. Also, now that osdc->osdmap can't ever be NULL, drop the check. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 5 +++++ include/linux/ceph/osdmap.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 19b14862d3e0..1b3b6e155392 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -279,6 +279,11 @@ struct ceph_osd_client { struct workqueue_struct *notify_wq; }; +static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) +{ + return osdc->osdmap->flags & flag; +} + extern int ceph_osdc_setup(void); extern void ceph_osdc_cleanup(void); diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index ddc426b22d81..9ccf4dbe55f8 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -189,11 +189,6 @@ static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd) return !ceph_osd_is_up(map, osd); } -static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) -{ - return map && (map->flags & flag); -} - extern char *ceph_osdmap_state_str(char *str, int len, int state); extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd); -- cgit v1.2.3 From e2082e3ab801b989d8d5337b2ecbfc61d09781cb Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 31 Mar 2016 16:26:50 -0400 Subject: dma-buf: headerdoc fixes Apparently nobody noticed that dma-buf.h wasn't actually pulled into docbook build. And as a result the headerdoc comments bitrot a bit. Add missing params/fields. Signed-off-by: Rob Clark Signed-off-by: Sumit Semwal --- include/linux/dma-buf.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 3fe90d494edb..4551c6f2a6c4 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -112,19 +112,24 @@ struct dma_buf_ops { * @file: file pointer used for sharing buffers across, and for refcounting. * @attachments: list of dma_buf_attachment that denotes all devices attached. * @ops: dma_buf_ops associated with this buffer object. + * @lock: used internally to serialize list manipulation, attach/detach and vmap/unmap + * @vmapping_counter: used internally to refcnt the vmaps + * @vmap_ptr: the current vmap ptr if vmapping_counter > 0 * @exp_name: name of the exporter; useful for debugging. * @owner: pointer to exporter module; used for refcounting when exporter is a * kernel module. * @list_node: node for dma_buf accounting and debugging. * @priv: exporter specific private data for this buffer object. * @resv: reservation object linked to this dma-buf + * @poll: for userspace poll support + * @cb_excl: for userspace poll support + * @cb_shared: for userspace poll support */ struct dma_buf { size_t size; struct file *file; struct list_head attachments; const struct dma_buf_ops *ops; - /* mutex to serialize list manipulation, attach/detach and vmap/unmap */ struct mutex lock; unsigned vmapping_counter; void *vmap_ptr; @@ -188,9 +193,11 @@ struct dma_buf_export_info { /** * helper macro for exporters; zeros and fills in most common values + * + * @name: export-info name */ -#define DEFINE_DMA_BUF_EXPORT_INFO(a) \ - struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME, \ +#define DEFINE_DMA_BUF_EXPORT_INFO(name) \ + struct dma_buf_export_info name = { .exp_name = KBUILD_MODNAME, \ .owner = THIS_MODULE } /** -- cgit v1.2.3 From dad6c3945fd25384c2b92306a90ba033e1130428 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 31 Mar 2016 16:26:51 -0400 Subject: reservation: add headerdoc comments Signed-off-by: Rob Clark Signed-off-by: Sumit Semwal --- include/linux/reservation.h | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 49d057655d62..b0f305e77b7f 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -49,12 +49,27 @@ extern struct ww_class reservation_ww_class; extern struct lock_class_key reservation_seqcount_class; extern const char reservation_seqcount_string[]; +/** + * struct reservation_object_list - a list of shared fences + * @rcu: for internal use + * @shared_count: table of shared fences + * @shared_max: for growing shared fence table + * @shared: shared fence table + */ struct reservation_object_list { struct rcu_head rcu; u32 shared_count, shared_max; struct fence __rcu *shared[]; }; +/** + * struct reservation_object - a reservation object manages fences for a buffer + * @lock: update side lock + * @seq: sequence count for managing RCU read-side synchronization + * @fence_excl: the exclusive fence, if there is one currently + * @fence: list of current shared fences + * @staged: staged copy of shared fences for RCU updates + */ struct reservation_object { struct ww_mutex lock; seqcount_t seq; @@ -68,6 +83,10 @@ struct reservation_object { #define reservation_object_assert_held(obj) \ lockdep_assert_held(&(obj)->lock.base) +/** + * reservation_object_init - initialize a reservation object + * @obj: the reservation object + */ static inline void reservation_object_init(struct reservation_object *obj) { @@ -79,6 +98,10 @@ reservation_object_init(struct reservation_object *obj) obj->staged = NULL; } +/** + * reservation_object_fini - destroys a reservation object + * @obj: the reservation object + */ static inline void reservation_object_fini(struct reservation_object *obj) { @@ -106,6 +129,14 @@ reservation_object_fini(struct reservation_object *obj) ww_mutex_destroy(&obj->lock); } +/** + * reservation_object_get_list - get the reservation object's + * shared fence list, with update-side lock held + * @obj: the reservation object + * + * Returns the shared fence list. Does NOT take references to + * the fence. The obj->lock must be held. + */ static inline struct reservation_object_list * reservation_object_get_list(struct reservation_object *obj) { @@ -113,6 +144,17 @@ reservation_object_get_list(struct reservation_object *obj) reservation_object_held(obj)); } +/** + * reservation_object_get_excl - get the reservation object's + * exclusive fence, with update-side lock held + * @obj: the reservation object + * + * Returns the exclusive fence (if any). Does NOT take a + * reference. The obj->lock must be held. + * + * RETURNS + * The exclusive fence or NULL + */ static inline struct fence * reservation_object_get_excl(struct reservation_object *obj) { @@ -120,6 +162,17 @@ reservation_object_get_excl(struct reservation_object *obj) reservation_object_held(obj)); } +/** + * reservation_object_get_excl_rcu - get the reservation object's + * exclusive fence, without lock held. + * @obj: the reservation object + * + * If there is an exclusive fence, this atomically increments it's + * reference count and returns it. + * + * RETURNS + * The exclusive fence or NULL if none + */ static inline struct fence * reservation_object_get_excl_rcu(struct reservation_object *obj) { -- cgit v1.2.3 From 4320c2a22df12f954edd4997f71ca3a4216312b2 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Mon, 11 Apr 2016 12:48:55 +0100 Subject: fence: add missing descriptions for fence The members child_list and active_list were added to the fence struct without descriptions for the Documentation. Adding these. Fixes: b55b54b5db33 ("staging/android: remove struct sync_pt") Signed-off-by: Luis de Bethencourt Reviewed-by: Javier Martinez Canillas Reviewed-by: Gustavo Padovan Signed-off-by: Sumit Semwal --- include/linux/fence.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fence.h b/include/linux/fence.h index 2b17698b60b8..2056e9fd0138 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -49,6 +49,8 @@ struct fence_cb; * @timestamp: Timestamp when the fence was signaled. * @status: Optional, only valid if < 0, must be set before calling * fence_signal, indicates that the fence has completed with an error. + * @child_list: list of children fences + * @active_list: list of active fences * * the flags member must be manipulated and read using the appropriate * atomic ops (bit_*), so taking the spinlock will not be needed most -- cgit v1.2.3 From 40eb90e9ccc3f96f937ea1db79d0f9cb61553ed5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 29 May 2016 17:42:13 +0800 Subject: sctp: sctp_diag should dump sctp socket type Now we cannot distinguish that one sk is a udp or sctp style when we use ss to dump sctp_info. it's necessary to dump it as well. For sctp_diag, ss support is not officially available, thus there are no official users of this yet, so we can add this field in the middle of sctp_info without breaking user API. v1->v2: - move 'sctpi_s_type' field to the end of struct sctp_info, so that it won't cause incompatibility with applications already built. - add __reserved3 in sctp_info to make sure sctp_info is 8-byte alignment. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index dacb5e711994..de1f64318fc4 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -765,6 +765,8 @@ struct sctp_info { __u8 sctpi_s_disable_fragments; __u8 sctpi_s_v4mapped; __u8 sctpi_s_frag_interleave; + __u32 sctpi_s_type; + __u32 __reserved3; }; struct sctp_infox { -- cgit v1.2.3 From 480ce08a70e4179f34808a3bdbfe6627f624cf54 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 20 May 2016 18:32:31 +0800 Subject: FS-Cache: make check_consistency callback return int __fscache_check_consistency() calls check_consistency() callback and return the callback's return value. But the return type of check_consistency() is bool. So __fscache_check_consistency() return 1 if the cache is inconsistent. This is inconsistent with the document. Signed-off-by: Yan, Zheng Acked-by: David Howells --- include/linux/fscache-cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 604e1526cd00..13ba552e6c09 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -241,7 +241,7 @@ struct fscache_cache_ops { /* check the consistency between the backing cache and the FS-Cache * cookie */ - bool (*check_consistency)(struct fscache_operation *op); + int (*check_consistency)(struct fscache_operation *op); /* store the updated auxiliary data on an object */ void (*update_object)(struct fscache_object *object); -- cgit v1.2.3 From dfc2507b26af22b0bbc85251b8545b36d8bc5d72 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 1 Jun 2016 11:53:26 -0700 Subject: time: Make settimeofday error checking work again In commit 86d3473224b0 some of the checking for a valid timeval was subtley changed which caused -EINVAL to be returned whenever the timeval was null. However, it is possible to set the timezone data while specifying a NULL timeval, which is usually done to handle systems where the RTC keeps local time instead of UTC. Thus the patch causes such systems to have the time incorrectly set. This patch addresses the issue by handling the error conditionals in the same way as was done previously. Fixes: 86d3473224b0 "time: Introduce do_sys_settimeofday64()" Reported-by: Mika Westerberg Signed-off-by: John Stultz Tested-by: Mika Westerberg Cc: Prarit Bhargava Cc: Arnd Bergmann Cc: Baolin Wang Cc: Richard Cochran Cc: Shuah Khan Link: http://lkml.kernel.org/r/1464807207-16530-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/timekeeping.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 37dbacf84849..816b7543f81b 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -21,6 +21,9 @@ static inline int do_sys_settimeofday(const struct timespec *tv, struct timespec64 ts64; if (!tv) + return do_sys_settimeofday64(NULL, tz); + + if (!timespec_valid(tv)) return -EINVAL; ts64 = timespec_to_timespec64(*tv); -- cgit v1.2.3 From dd5f1b049dc139876801db3cdd0f20d21fd428cc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 2 Jun 2016 09:00:28 +0100 Subject: irqchip/gic-v3: Fix ICC_SGI1R_EL1.INTID decoding mask The INTID mask is wrong, and is made a signed value, which has nteresting effects in the KVM emulation. Let's sanitize it. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index bfbd707de390..85a8c2acdef5 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -305,7 +305,7 @@ #define ICC_SGI1R_AFFINITY_1_SHIFT 16 #define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) #define ICC_SGI1R_SGI_ID_SHIFT 24 -#define ICC_SGI1R_SGI_ID_MASK (0xff << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) #define ICC_SGI1R_AFFINITY_2_SHIFT 32 #define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) #define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 -- cgit v1.2.3 From fab0cdc30d81694d2d5524b24e42c43414971719 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 12 May 2016 10:46:34 +0200 Subject: irqchip/gic-v3: Fix copy+paste mistakes in defines ICC_SGI1R_AFFINITY_{2,3}_MASK are unused, which is good because they were defined with the wrong shifts. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 85a8c2acdef5..dc493e0f0ff7 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -307,10 +307,10 @@ #define ICC_SGI1R_SGI_ID_SHIFT 24 #define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) #define ICC_SGI1R_AFFINITY_2_SHIFT 32 -#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT) #define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 #define ICC_SGI1R_AFFINITY_3_SHIFT 48 -#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT) #include -- cgit v1.2.3 From f86e4271978bd93db466d6a95dad4b0fdcdb04f6 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 3 Jun 2016 14:55:38 -0700 Subject: mm: check the return value of lookup_page_ext for all call sites Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug. Tested with ltp with "page_owner=0". [1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE [akpm@linux-foundation.org: fix build-breaking typos] [arnd@arndb.de: fix build problems from lookup_page_ext] Link: http://lkml.kernel.org/r/6285269.2CksypHdYp@wuerfel [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1464023768-31025-1-git-send-email-yang.shi@linaro.org Signed-off-by: Yang Shi Signed-off-by: Arnd Bergmann Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index bf268fa92c5b..fec40271339f 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops; static inline bool page_is_young(struct page *page) { - return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return false; + + return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); } static inline void set_page_young(struct page *page) { - set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + + set_bit(PAGE_EXT_YOUNG, &page_ext->flags); } static inline bool test_and_clear_page_young(struct page *page) { - return test_and_clear_bit(PAGE_EXT_YOUNG, - &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return false; + + return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); } static inline bool page_is_idle(struct page *page) { - return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return false; + + return test_bit(PAGE_EXT_IDLE, &page_ext->flags); } static inline void set_page_idle(struct page *page) { - set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + + set_bit(PAGE_EXT_IDLE, &page_ext->flags); } static inline void clear_page_idle(struct page *page) { - clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + + clear_bit(PAGE_EXT_IDLE, &page_ext->flags); } #endif /* CONFIG_64BIT */ -- cgit v1.2.3 From eedf265aa003b4781de24cfed40a655a664457e6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Jun 2016 10:29:47 -0500 Subject: devpts: Make each mount of devpts an independent filesystem. The /dev/ptmx device node is changed to lookup the directory entry "pts" in the same directory as the /dev/ptmx device node was opened in. If there is a "pts" entry and that entry is a devpts filesystem /dev/ptmx uses that filesystem. Otherwise the open of /dev/ptmx fails. The DEVPTS_MULTIPLE_INSTANCES configuration option is removed, so that userspace can now safely depend on each mount of devpts creating a new instance of the filesystem. Each mount of devpts is now a separate and equal filesystem. Reserved ttys are now available to all instances of devpts where the mounter is in the initial mount namespace. A new vfs helper path_pts is introduced that finds a directory entry named "pts" in the directory of the passed in path, and changes the passed in path to point to it. The helper path_pts uses a function path_parent_directory that was factored out of follow_dotdot. In the implementation of devpts: - devpts_mnt is killed as it is no longer meaningful if all mounts of devpts are equal. - pts_sb_from_inode is replaced by just inode->i_sb as all cached inodes in the tty layer are now from the devpts filesystem. - devpts_add_ref is rolled into the new function devpts_ptmx. And the unnecessary inode hold is removed. - devpts_del_ref is renamed devpts_release and reduced to just a deacrivate_super. - The newinstance mount option continues to be accepted but is now ignored. In devpts_fs.h definitions for when !CONFIG_UNIX98_PTYS are removed as they are never used. Documentation/filesystems/devices.txt is updated to describe the current situation. This has been verified to work properly on openwrt-15.05, centos5, centos6, centos7, debian-6.0.2, debian-7.9, debian-8.2, ubuntu-14.04.3, ubuntu-15.10, fedora23, magia-5, mint-17.3, opensuse-42.1, slackware-14.1, gentoo-20151225 (13.0?), archlinux-2015-12-01. With the caveat that on centos6 and on slackware-14.1 that there wind up being two instances of the devpts filesystem mounted on /dev/pts, the lower copy does not end up getting used. Signed-off-by: "Eric W. Biederman" Cc: Greg KH Cc: Peter Hurley Cc: Peter Anvin Cc: Andy Lutomirski Cc: Al Viro Cc: Serge Hallyn Cc: Willy Tarreau Cc: Aurelien Jarno Cc: One Thousand Gnomes Cc: Jann Horn Cc: Jiri Slaby Cc: Florian Weimer Cc: Konstantin Khlebnikov Signed-off-by: Linus Torvalds --- include/linux/devpts_fs.h | 9 ++++----- include/linux/namei.h | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 5871f292b596..277ab9af9ac2 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -15,13 +15,12 @@ #include -struct pts_fs_info; - #ifdef CONFIG_UNIX98_PTYS -/* Look up a pts fs info and get a ref to it */ -struct pts_fs_info *devpts_get_ref(struct inode *, struct file *); -void devpts_put_ref(struct pts_fs_info *); +struct pts_fs_info; + +struct pts_fs_info *devpts_acquire(struct file *); +void devpts_release(struct pts_fs_info *); int devpts_new_index(struct pts_fs_info *); void devpts_kill_index(struct pts_fs_info *, int); diff --git a/include/linux/namei.h b/include/linux/namei.h index ec5ec2818a28..d3d0398f2a1b 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -45,6 +45,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 +extern int path_pts(struct path *path); + extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); static inline int user_path_at(int dfd, const char __user *name, unsigned flags, -- cgit v1.2.3